{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,29]],"date-time":"2025-12-29T19:00:30Z","timestamp":1767034830077,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T00:00:00Z","timestamp":1730678400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"European Union","award":["01017743, RIA action of Horizon 2020"],"award-info":[{"award-number":["01017743, RIA action of Horizon 2020"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,4]]},"DOI":"10.1145\/3678957.3685740","type":"proceedings-article","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T04:35:53Z","timestamp":1730262953000},"page":"321-330","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Is Distance a Modality? Multi-Label Learning for Speech-Based Joint Prediction of Attributed Traits and Perceived Distances in 3D Audio Immersive Environments"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-9642-660X","authenticated-orcid":false,"given":"Eva","family":"Fringi","sequence":"first","affiliation":[{"name":"University of Glasgow, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0195-5807","authenticated-orcid":false,"given":"Nesreen","family":"Alshubaily","sequence":"additional","affiliation":[{"name":"University of Glasgow, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9297-2613","authenticated-orcid":false,"given":"Lorenzo","family":"Picinali","sequence":"additional","affiliation":[{"name":"Dyson School of Design Engineering, Imperial College London, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9720-3899","authenticated-orcid":false,"given":"Stephen Anthony","family":"Brewster","sequence":"additional","affiliation":[{"name":"School of Computing Science, University of Glasgow, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2167-4891","authenticated-orcid":false,"given":"Tanaya","family":"Guha","sequence":"additional","affiliation":[{"name":"School of Computing Science, University of Glasgow, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9048-0524","authenticated-orcid":false,"given":"Alessandro","family":"Vinciarelli","sequence":"additional","affiliation":[{"name":"School of Computing Science, University of Glasgow, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,11,4]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.24251\/HICSS.2022.524"},{"volume-title":"Proceedings of Interspeech. 421\u2013425","author":"An G.","key":"e_1_3_2_1_2_1","unstructured":"G. An, S.I. Levitan, J. Hirschberg, and R. Levitan. 2018. Deep Personality Recognition for Deception Detection.. In Proceedings of Interspeech. 421\u2013425."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2014.01097"},{"key":"e_1_3_2_1_4_1","first-page":"12449","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume":"33","author":"Baevski A.","year":"2020","unstructured":"A. Baevski, Y. Zhou, A. Mohamed, and M. Auli. 2020. wav2vec 2.0: A framework for self-supervised learning of speech representations. Advances in Neural Information Processing Systems 33 (2020), 12449\u201312460.","journal-title":"Advances in Neural Information Processing Systems"},{"volume-title":"Proceedings of the IEEE International Conference on Robot and Human Interactive Communication. 1\u20136.","author":"Banks J.","key":"e_1_3_2_1_5_1","unstructured":"J. Banks and A. Edwards. 2019. A common social distance scale for robots and humans. In Proceedings of the IEEE International Conference on Robot and Human Interactive Communication. 1\u20136."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1177\/0265407597143008"},{"volume-title":"Proceedings of Interspeech, Vol.\u00a02021","author":"Cai X.","key":"e_1_3_2_1_7_1","unstructured":"X. Cai, J. Yuan, R. Zheng, L. Huang, and K. Church. 2021. Speech emotion recognition with multi-task learning.. In Proceedings of Interspeech, Vol.\u00a02021. Brno, 4508\u20134512."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2017.2763132"},{"key":"e_1_3_2_1_9_1","unstructured":"R. Collobert J. Weston L. Bottou M. Karlen K. Kavukcuoglu and P.P. Kuksa. 2011. Natural Language Processing (almost) from Scratch. CoRR abs\/1103.0398 (2011)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0211899"},{"key":"e_1_3_2_1_11_1","volume-title":"Nonverbal behavior and self-presentation.Psychological Bulletin 111, 2","author":"DePaulo B.M.","year":"1992","unstructured":"B.M. DePaulo. 1992. Nonverbal behavior and self-presentation.Psychological Bulletin 111, 2 (1992), 203."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1121\/10.0003437"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1146\/annurev.psych.52.1.197"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"L.H. Gilpin D.M. Olson and T. Alrashed. 2018. Perception of Speaker Personality Traits Using Speech Signals. In Extended Abstracts of CHI. 1\u20136.","DOI":"10.1145\/3170427.3188557"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2019.01.027"},{"volume-title":"The Silent Language","author":"Hall E.T.","key":"e_1_3_2_1_16_1","unstructured":"E.T. Hall. 1990. The Silent Language. Knopf Doubleday Publishing Group."},{"key":"e_1_3_2_1_17_1","unstructured":"H. Hayat C. Ventura and A. Lapedriza. 2019. On the use of interpretable CNN for personality trait recognition from audio. In Artificial Intelligence Research and Development. 135\u2013144."},{"key":"e_1_3_2_1_18_1","unstructured":"D.C. Howell. 2009. Statistical Methods for Psychology. Cengage Learning."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581085"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1177\/1747021819865833"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10772-016-9390-0"},{"key":"e_1_3_2_1_22_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma D.P.","year":"2014","unstructured":"D.P. Kingma and J. Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_23_1","unstructured":"M.L. Knapp and J.A. Hall. 1972. Nonverbal Communication in Human Interaction. Harcourt Brace College Publishers."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.3758\/s13414-015-1015-1"},{"volume-title":"Proceedings of Interspeech. 5197\u2013520","author":"Li Y.","key":"e_1_3_2_1_25_1","unstructured":"Y. Li, P. Bell, and C. Lai. 2023. Transfer Learning for Personality Perception via Speech Emotion Recognition. In Proceedings of Interspeech. 5197\u2013520."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.04.049"},{"volume-title":"Proceedings of Interspeech. 2803\u20132807","author":"Li Y.","key":"e_1_3_2_1_27_1","unstructured":"Y. Li, T. Zhao, and T. Kawahara. 2019. Improved End-to-End Speech Emotion Recognition Using Self Attention Mechanism and Multitask Learning.. In Proceedings of Interspeech. 2803\u20132807."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPA.2013.6694234"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.3025108"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"G. Matthews I.J. Deary and M.C. Whiteman. 2003. Personality Traits. Cambridge University Press.","DOI":"10.1017\/CBO9780511812736"},{"volume-title":"The Five-Factor Model of Personality","author":"McCrae R.R.","key":"e_1_3_2_1_31_1","unstructured":"R.R. McCrae. 2009. The Five-Factor Model of Personality. In The Cambridge handbook of personality psychology, P.J. Corr and G.\u00a0Matthews (Eds.). Cambridge University Press, 148\u2013161."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-019-09770-z"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/T-AFFC.2012.5"},{"volume-title":"Virtual Society: The Metaverse and the New Frontiers of Human Experience","author":"Narula H.","key":"e_1_3_2_1_34_1","unstructured":"H. Narula. 2022. Virtual Society: The Metaverse and the New Frontiers of Human Experience. Random House."},{"volume-title":"Wired for speech: How voice activates and advances the Human-Computer relationship","author":"Nass C","key":"e_1_3_2_1_35_1","unstructured":"C Nass and S Brave. 2005. Wired for speech: How voice activates and advances the Human-Computer relationship. The MIT Press."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.283.5406.1272"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1086\/431246"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"L. Picinali. 2009. 3D Sound Simulation over Headphones. In Handbook of Research on Computational Arts and Creative Informatics. IGI Global 113\u2013131.","DOI":"10.4018\/978-1-60566-352-4.ch007"},{"volume-title":"Proceedings of the Audio Engineering Society International Conference on Immersive and Interactive Audio.","author":"Picinali L.","key":"e_1_3_2_1_39_1","unstructured":"L. Picinali, R. Hrafnkelsson, and A. Reyes-Lecuona. 2019. The 3D Tune-In Toolkit VST binaural audio plugin. In Proceedings of the Audio Engineering Society International Conference on Immersive and Interactive Audio."},{"key":"e_1_3_2_1_40_1","volume-title":"System-to-user and user-to-system adaptations in binaural audio. Sonic Interactions in Virtual Environments","author":"Picinali L.","year":"2023","unstructured":"L. Picinali and B.F.G. Katz. 2023. System-to-user and user-to-system adaptations in binaural audio. Sonic Interactions in Virtual Environments (2023), 115\u2013143."},{"volume-title":"Proceedings of Interspeech. 258\u2013261","author":"Polzehl T.","key":"e_1_3_2_1_41_1","unstructured":"T. Polzehl, K. Schoenenberg, S. M\u00f6ller, F. Metze, G. Mohammadi, and A. Vinciarelli. 2012. On speaker-independent personality perception and prediction from speech. In Proceedings of Interspeech. 258\u2013261."},{"key":"e_1_3_2_1_42_1","unstructured":"B. Reeves and C. Nass. 1996. The media equation: How people treat computers television and new media like real people and places. Cambridge University Press New York (USA)."},{"key":"e_1_3_2_1_43_1","unstructured":"V.P. Richmond and J.C. McCroskey. 1995. Nonverbal Behavior in Interpersonal Relations. Allyn and Bacon."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"R. Rosenthal. 2005. Conducting judgment studies: Some methodological issues. In The New Handbook of Methods in Nonverbal Behavior Research J.A. Harrigan R.\u00a0Rosenthal and K.R. Scherer (Eds.). 199\u2013234.","DOI":"10.1093\/oso\/9780198529613.003.0005"},{"key":"e_1_3_2_1_45_1","volume-title":"An overview of multi-task learning in deep neural networks. arXiv preprint arXiv:1706.05098","author":"Ruder S.","year":"2017","unstructured":"S. Ruder. 2017. An overview of multi-task learning in deep neural networks. arXiv preprint arXiv:1706.05098 (2017)."},{"key":"e_1_3_2_1_46_1","unstructured":"G. Saucier and L.R. Goldberg. 1996. The language of personality: Lexical Perspectives on the Five-Factor Model. In The Five-Factor Model of Personality J.S. Wiggins (Ed.)."},{"key":"e_1_3_2_1_47_1","volume-title":"Body torque. Social Research","author":"Schegloff E.A.","year":"1998","unstructured":"E.A. Schegloff. 1998. Body torque. Social Research (1998), 535\u2013596."},{"volume-title":"Proceedings of Interspeech.","author":"Schuller B.","key":"e_1_3_2_1_48_1","unstructured":"B. Schuller, S. Steidl, A. Batliner, E. Noth, A. Vinciarelli, F. Burkhardt, R.\u00a0V. Son, F. Weninger, F. Eyben, T. Bocklet, G. Mohammadi, and B. Weiss. 2012. The INTERSPEECH 2012 Speaker Trait Challenge. In Proceedings of Interspeech."},{"volume-title":"Proceedings of Interspeech. 929\u2013933","author":"Solera-Ure\u00f1a R.","key":"e_1_3_2_1_49_1","unstructured":"R. Solera-Ure\u00f1a, H. Moniz, F. Batista, R. Cabarr\u00e3o, A. Pompili, R. Astudillo, J. Campos, A. Paiva, and I. Trancoso. 2017. A semi-supervised learning approach for acoustic-prosodic personality perception in under-resourced domains. In Proceedings of Interspeech. 929\u2013933."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/IWSSIP55020.2022.9854439"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPA.2017.8282287"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11370-008-0017-4"},{"volume-title":"Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing. 7268\u20137272","author":"Triantafyllopoulos A.","key":"e_1_3_2_1_53_1","unstructured":"A. Triantafyllopoulos and B.W. Schuller. 2021. The role of task and acoustic similarity in audio transfer learning: Insights from the speech emotion recognition case. In Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing. 7268\u20137272."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1146\/annurev.psych.59.103006.093707"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2014.2330816"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"J. Wagner A. Triantafyllopoulos H. Wierstorf M. Schmitt F. Burkhardt F. Eyben and B.W. Schuller. 2023. Dawn of the transformer era in speech emotion recognition: closing the valence gap. IEEE Transactions on Pattern Analysis and Machine Intelligence (2023).","DOI":"10.1109\/TPAMI.2023.3263585"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.3390\/s22166206"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3076820"},{"key":"e_1_3_2_1_59_1","first-page":"197","article-title":"Automatic personality recognition and perception using deep learning and supervised evaluation method","volume":"9","author":"Zaferani E.J.","year":"2022","unstructured":"E.J. Zaferani, M. Teshnehlab, and M. Vali. 2022. Automatic personality recognition and perception using deep learning and supervised evaluation method. Journal of Applied Research on Industrial Engineering 9, 2 (2022), 197\u2013211.","journal-title":"Journal of Applied Research on Industrial Engineering"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3070203"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2022.839619"},{"volume-title":"Proceedings of the International Symposium on Chinese Spoken Language Processing. 309\u2013313","author":"Zhu M.","key":"e_1_3_2_1_62_1","unstructured":"M. Zhu, X. Xie, L. Zhang, and J. Wang. 2018. Automatic personality perception from speech in mandarin. In Proceedings of the International Symposium on Chinese Spoken Language Processing. 309\u2013313."}],"event":{"name":"ICMI '24: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","acronym":"ICMI '24","location":"San Jose Costa Rica"},"container-title":["International Conference on Multimodel Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3678957.3685740","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3678957.3685740","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:10:12Z","timestamp":1750295412000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3678957.3685740"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,4]]},"references-count":62,"alternative-id":["10.1145\/3678957.3685740","10.1145\/3678957"],"URL":"https:\/\/doi.org\/10.1145\/3678957.3685740","relation":{},"subject":[],"published":{"date-parts":[[2024,11,4]]},"assertion":[{"value":"2024-11-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}