{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,25]],"date-time":"2026-06-25T06:46:47Z","timestamp":1782370007706,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":112,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T00:00:00Z","timestamp":1776038400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/legalcode"}],"funder":[{"name":"The Research Council of Norway","award":["326907"],"award-info":[{"award-number":["326907"]}]},{"name":"The Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) - SPP 2199 - in the Project TransforM","award":["425412993"],"award-info":[{"award-number":["425412993"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,13]]},"DOI":"10.1145\/3772318.3791747","type":"proceedings-article","created":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T04:12:21Z","timestamp":1776053541000},"page":"1-26","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Beyond Words: Measuring User Experience through Speech Analysis in Voice User Interfaces"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8398-4118","authenticated-orcid":false,"given":"Yong","family":"Ma","sequence":"first","affiliation":[{"name":"University of Bergen, Bergen, Norway"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0722-4632","authenticated-orcid":false,"given":"Xuesong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Southern University of Science and Technology, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6361-1717","authenticated-orcid":false,"given":"Xuedong","family":"Zhang","sequence":"additional","affiliation":[{"name":"LMU Munich, Munich, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1565-5494","authenticated-orcid":false,"given":"Natalia","family":"Bart\u0142omiejczyk","sequence":"additional","affiliation":[{"name":"University of Neuch\u00e2tel, Neuch\u00e2tel, Switzerland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3968-7298","authenticated-orcid":false,"given":"Seungwoo","family":"Je","sequence":"additional","affiliation":[{"name":"Southern University of Science and Technology, Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7946-1552","authenticated-orcid":false,"given":"Adrian","family":"Holzer","sequence":"additional","affiliation":[{"name":"University of Neuch\u00e2tel, Neuch\u00e2tel, Switzerland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9562-5147","authenticated-orcid":false,"given":"Morten","family":"Fjeld","sequence":"additional","affiliation":[{"name":"MediaFutures, t2i Lab, University of Bergen, Bergen, Norway and t2i lab, CSE, Chalmers University of Technology, Gothenburg, Sweden"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9007-9888","authenticated-orcid":false,"given":"Andreas","family":"Martin Butz","sequence":"additional","affiliation":[{"name":"LMU Munich, Munich, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,4,13]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"crossref","unstructured":"Abeer Alabbas and Khalid Alomar. 2025. A Weighted Composite Metric for Evaluating User Experience in Educational Chatbots: Balancing Usability Engagement and Effectiveness. Future Internet 17 2 (2025) 64.","DOI":"10.3390\/fi17020064"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/3242671.3242712"},{"key":"e_1_3_3_2_4_2","first-page":"020019","volume-title":"AIP Conference Proceedings","volume":"3279","author":"Anandhan Velusamy","year":"2025","unstructured":"Velusamy Anandhan, Akilandeshwari Jeyapal, Sri\u00a0Sathriyan Madhusamy, Srinath Nagaraj, and Harish\u00a0Bharathi Mahadevan. 2025. Enhancing user experience in intelligent voice interfaces: Challenges, architectural elements, and future directions. In AIP Conference Proceedings , Vol.\u00a03279. AIP Publishing LLC, 020019."},{"key":"e_1_3_3_2_5_2","first-page":"2037","volume-title":"INTERSPEECH","author":"Ang Jeremy","year":"2002","unstructured":"Jeremy Ang, Rajdip Dhillon, Ashley Krupski, Elizabeth Shriberg, and Andreas Stolcke. 2002. Prosody-based automatic detection of annoyance and frustration in human-computer dialog. In INTERSPEECH. Denver, CO, 2037\u20132040."},{"key":"e_1_3_3_2_6_2","unstructured":"Alexei Baevski Yuhao Zhou Abdelrahman Mohamed and Michael Auli. 2020. wav2vec 2.0: A framework for self-supervised learning of speech representations. Advances in neural information processing systems 33 (2020) 12449\u201312460."},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33197-8_13"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3308561.3353797"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"crossref","unstructured":"Virginia Braun and Victoria Clarke. 2006. Using thematic analysis in psychology. Qualitative research in psychology 3 2 (2006) 77\u2013101.","DOI":"10.1191\/1478088706qp063oa"},{"key":"e_1_3_3_2_10_2","first-page":"276","volume-title":"Proceedings of the 31st International Conference on Computational Linguistics: Industry Track","author":"Caralt Mireia\u00a0Hernandez","year":"2025","unstructured":"Mireia\u00a0Hernandez Caralt, Ivan Sekuli\u0107, Filip Carevic, Nghia Khau, Diana\u00a0Nicoleta Popa, Bruna Guedes, Victor Guimar\u00e3es, Zeyu Yang, Andre Manso, Meghana Reddy, et\u00a0al. 2025. \u201cStupid robot, I want to speak to a human!\u201d User Frustration Detection in Task-Oriented Dialog Systems. In Proceedings of the 31st International Conference on Computational Linguistics: Industry Track. 276\u2013285."},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"crossref","unstructured":"Ti-Chung Cheng Tiffany\u00a0Wenting Li Yi-Hung Chou Karrie Karahalios and Hari Sundaram. 2021. \" I can show what I really like.\" Eliciting Preferences via Quadratic Voting. Proceedings of the ACM on human-computer interaction 5 CSCW1 (2021) 1\u201343.","DOI":"10.1145\/3449281"},{"key":"e_1_3_3_2_12_2","unstructured":"Mauro Cherubini and Nuria Oliver. 2009. A refined experience sampling method to capture mobile user experience. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/0906.4125 (2009)."},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"crossref","unstructured":"Andy Cockburn Philip Quinn and Carl Gutwin. 2017. The effects of interaction sequencing on user experience and preference. International Journal of Human-Computer Studies 108 (2017) 89\u2013104.","DOI":"10.1016\/j.ijhcs.2017.07.005"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/CogInfoCom.2014.7020425"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Jared\u00a0R Curhan and Alex Pentland. 2007. Thin slices of negotiation: predicting outcomes from conversational dynamics within the first 5 minutes. Journal of Applied Psychology 92 3 (2007) 802.","DOI":"10.1037\/0021-9010.92.3.802"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"crossref","unstructured":"Kimberly\u00a0L Dahl and Cara\u00a0E Stepp. 2021. Changes in relative fundamental frequency under increased cognitive load in individuals with healthy voices. Journal of Speech Language and Hearing Research 64 4 (2021) 1189\u20131196.","DOI":"10.1044\/2021_JSLHR-20-00134"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-05544-7_20"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"crossref","unstructured":"Akshay\u00a0Madhav Deshmukh and Ricardo Chalmeta. 2024. User experience and usability of voice user interfaces: A systematic literature review. Information 15 9 (2024) 579.","DOI":"10.3390\/info15090579"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"crossref","unstructured":"Parashar Dhakal Praveen Damacharla Ahmad\u00a0Y Javaid and Vijay Devabhaktuni. 2019. A near real-time automatic speaker recognition architecture for voice-based user interface. Machine learning and knowledge extraction 1 1 (2019) 504\u2013520.","DOI":"10.3390\/make1010031"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"crossref","unstructured":"Alexis Dinno. 2015. Nonparametric pairwise multiple comparisons in independent groups using Dunn\u2019s test. The Stata Journal 15 1 (2015) 292\u2013300.","DOI":"10.1177\/1536867X1501500117"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3715668.3736363"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Olive\u00a0Jean Dunn. 1964. Multiple comparisons using rank sums. Technometrics 6 3 (1964) 241\u2013252.","DOI":"10.1080\/00401706.1964.10490181"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"crossref","unstructured":"Faruk Lawal\u00a0Ibrahim Dutsinma Debajyoti Pal Suree Funilkul and Jonathan\u00a0H Chan. 2022. A systematic review of voice assistant usability: An ISO 9241\u201311 approach. SN computer science 3 4 (2022) 267.","DOI":"10.1007\/s42979-022-01172-3"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"crossref","unstructured":"Sidney D\u2019Mello Arvid Kappas and Jonathan Gratch. 2018. The affective computing approach to affect measurement. Emotion Review 10 2 (2018) 174\u2013183.","DOI":"10.1177\/1754073917696583"},{"key":"e_1_3_3_2_25_2","unstructured":"Olga Egorow Ingo Siegert and Andreas Wendemuth. 2017. Prediction of user satisfaction in naturalistic human-computer interaction. (2017)."},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"crossref","unstructured":"Moataz El\u00a0Ayadi Mohamed\u00a0S Kamel and Fakhri Karray. 2011. Survey on speech emotion recognition: Features classification schemes and databases. Pattern recognition 44 3 (2011) 572\u2013587.","DOI":"10.1016\/j.patcog.2010.09.020"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"crossref","unstructured":"Aaron\u00a0C Elkins and Douglas\u00a0C Derrick. 2013. The sound of trust: voice as a measurement of trust during interactions with embodied conversational agents. Group decision and negotiation 22 5 (2013) 897\u2013913.","DOI":"10.1007\/s10726-012-9339-x"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874246"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445680"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"crossref","unstructured":"Lawal Ibrahim\u00a0Dutsinma Faruk Mohammad\u00a0Dawood Babakerkhell Pornchai Mongkolnam Vithida Chongsuphajaisiddhi Suree Funilkul and Debajyoti Pal. 2024. A review of subjective scales measuring the user experience of voice assistants. IEEE Access 12 (2024) 14893\u201314917.","DOI":"10.1109\/ACCESS.2024.3358423"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300374"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3409120.3410651"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3409120.3410651"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3593434.3593452"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/RO-MAN46459.2019.8956462"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517684"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"crossref","unstructured":"Andreas Hinderks Martin Schrepp Francisco Jos\u00e9\u00a0Dom\u00ednguez Mayo Mar\u00eda\u00a0Jos\u00e9 Escalona and J\u00f6rg Thomaschewski. 2019. Developing a UX KPI based on the user experience questionnaire. Computer Standards & Interfaces 65 (2019) 38\u201344.","DOI":"10.1016\/j.csi.2019.01.007"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"crossref","unstructured":"Gahangir Hossain. 2017. Rethinking self-reported measure in subjective evaluation of assistive technology. Human-centric Computing and Information Sciences 7 1 (2017) 23.","DOI":"10.1186\/s13673-017-0104-7"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"crossref","unstructured":"Wei-Ning Hsu Benjamin Bolte Yao-Hung\u00a0Hubert Tsai Kushal Lakhotia Ruslan Salakhutdinov and Abdelrahman Mohamed. 2021. Hubert: Self-supervised speech representation learning by masked prediction of hidden units. IEEE\/ACM transactions on audio speech and language processing 29 (2021) 3451\u20133460.","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517525"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"crossref","unstructured":"Adriana\u00a0Lorena Iniguez-Carrillo Laura\u00a0Sanely Gaytan-Lugo Miguel\u00a0Angel Garcia-Ruiz and Rocio Maciel-Arellano. 2021. Usability questionnaires to evaluate voice user interfaces. IEEE Latin America Transactions 19 9 (2021) 1468\u20131477.","DOI":"10.1109\/TLA.2021.9468439"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"crossref","unstructured":"Yannick Jadoul Bill Thompson and Bart De\u00a0Boer. 2018. Introducing parselmouth: A python interface to praat. Journal of Phonetics 71 (2018) 1\u201315.","DOI":"10.1016\/j.wocn.2018.07.001"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/TAEECE.2013.6557272"},{"key":"e_1_3_3_2_44_2","first-page":"476","volume-title":"International Conference on Social Robotics","author":"Janssens Ruben","year":"2024","unstructured":"Ruben Janssens, Eva Verhelst, Giulio\u00a0Antonio Abbo, Qiaoqiao Ren, Maria Jose\u00a0Pinto Bernal, and Tony Belpaeme. 2024. Child speech recognition in human-robot interaction: Problem solved?. In International Conference on Social Robotics. Springer, 476\u2013486."},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"crossref","unstructured":"Laurent Karsenty and Val\u00e9rie Botherel. 2005. Transparency strategies to help users handle system errors. Speech Communication 45 3 (2005) 305\u2013324.","DOI":"10.1016\/j.specom.2004.10.018"},{"key":"e_1_3_3_2_46_2","first-page":"4990","volume-title":"INTERSPEECH","author":"Kirkland Ambika","year":"2022","unstructured":"Ambika Kirkland, Harm Lameris, Eva Sz\u00e9kely, and Joakim Gustafson. 2022. Where\u2019s the uh, hesitation? The interplay between filled pause location, speech rate and fundamental frequency in perception of confidence. In INTERSPEECH. 4990\u20134994."},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","DOI":"10.1145\/3405755.3406119"},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"crossref","unstructured":"Indra Kishor Udit Mamodiya Sumit Saini and Badre Bossoufi. 2025. Voice-enabled human-robot interaction: adaptive self-learning systems for enhanced collaboration. Robotica (2025) 1\u201329.","DOI":"10.1017\/S0263574725000438"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"crossref","unstructured":"Andreas\u00a0M Klein Jana Deutschl\u00e4nder Kristina K\u00f6lln Maria Rauschenberger and Maria\u00a0Jos\u00e9 Escalona. 2024. Exploring the context of use for voice user interfaces: Toward context-dependent user experience quality testing. Journal of Software: Evolution and Process 36 7 (2024) e2618.","DOI":"10.1002\/smr.2618"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"publisher","DOI":"10.1145\/3404983.3410003"},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-35921-7_12"},{"key":"e_1_3_3_2_52_2","unstructured":"ANDREAS\u00a0M Klein and MARIA Rauschenberger. 2024. Flexible User Experience Evaluation for Voice User Interfaces."},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"crossref","unstructured":"Jennifer Kleinow and Anne Smith. 2006. Potential interactions among linguistic autonomic and motor factors in speech. Developmental Psychobiology: The Journal of the International Society for Developmental Psychobiology 48 4 (2006) 275\u2013287.","DOI":"10.1002\/dev.20141"},{"key":"e_1_3_3_2_54_2","doi-asserted-by":"crossref","unstructured":"Ahmet\u00a0Baki Kocaballi Liliana Laranjo and Enrico Coiera. 2019. Understanding and measuring user experience in conversational interfaces. Interacting with Computers 31 2 (2019) 192\u2013207.","DOI":"10.1093\/iwc\/iwz015"},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"crossref","unstructured":"Jessica Kollmorgen Andreas Hinderks and J\u00f6rg Thomaschewski. 2024. Selecting the appropriate user experience questionnaire and guidance for interpretation: the UEQ family. (2024).","DOI":"10.9781\/ijimai.2024.08.005"},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"crossref","unstructured":"Busra\u00a0Ozdenizci Kose. 2025. Beyond the Screen: Cross-Platform UX for Wearables IoT and Emerging Technologies. Navigating Usability and User Experience in a Multi-Platform World (2025) 1\u201318.","DOI":"10.4018\/979-8-3693-2337-3.ch001"},{"key":"e_1_3_3_2_57_2","doi-asserted-by":"crossref","unstructured":"William\u00a0H Kruskal and W\u00a0Allen Wallis. 1952. Use of ranks in one-criterion variance analysis. Journal of the American statistical Association 47 260 (1952) 583\u2013621.","DOI":"10.1080\/01621459.1952.10483441"},{"key":"e_1_3_3_2_58_2","doi-asserted-by":"crossref","unstructured":"Sari Kujala Tanja Walsh Piia Nurkka and Marian Crisan. 2014. Sentence completion for understanding users and evaluating user experience. Interacting with Computers 26 3 (2014) 238\u2013255.","DOI":"10.1093\/iwc\/iwt036"},{"key":"e_1_3_3_2_59_2","doi-asserted-by":"crossref","unstructured":"Lisa\u00a0R LaSalle and Edward\u00a0G Conture. 1995. Disfluency clusters of children who stutter: Relation of stutterings to self-repairs. Journal of Speech Language and Hearing Research 38 5 (1995) 965\u2013977.","DOI":"10.1044\/jshr.3805.965"},{"key":"e_1_3_3_2_60_2","doi-asserted-by":"crossref","unstructured":"Phu\u00a0Ngoc Le Eliathamby Ambikairajah Julien Epps Vidhyasaharan Sethu and Eric\u00a0HC Choi. 2011. Investigation of spectral centroid features for cognitive load classification. Speech Communication 53 4 (2011) 540\u2013551.","DOI":"10.1016\/j.specom.2011.01.005"},{"key":"e_1_3_3_2_61_2","doi-asserted-by":"crossref","unstructured":"Ka\u00a0Yi Lee Charlotte Cheuk\u00a0Kwan Chan Ching Yip Joyce Tin\u00a0Wing Li Cheuk\u00a0Fung Hau Sarah Suen\u00a0Yue Poon Hui\u00a0Min Chen Kar\u00a0Yan Li Michael\u00a0Francis Burrow Gloria Hoi\u00a0Yan Wong et\u00a0al. 2024. Association between tooth loss-related speech and psychosocial impairment with cognitive function: A pilot study in Hong Kong\u2019s older population. Journal of oral rehabilitation 51 8 (2024) 1475\u20131485.","DOI":"10.1111\/joor.13718"},{"key":"e_1_3_3_2_62_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3713090"},{"key":"e_1_3_3_2_63_2","doi-asserted-by":"crossref","unstructured":"Robin\u00a0J Lickley. 2015. Fluency and disfluency. The handbook of speech production (2015) 445\u2013474.","DOI":"10.1002\/9781118584156.ch20"},{"key":"e_1_3_3_2_64_2","doi-asserted-by":"publisher","DOI":"10.1145\/3357236.3395488"},{"key":"e_1_3_3_2_65_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706599.3719734"},{"key":"e_1_3_3_2_66_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-92977-9_19"},{"key":"e_1_3_3_2_67_2","doi-asserted-by":"crossref","unstructured":"Megan\u00a0K MacPherson. 2019. Cognitive load affects speech motor performance differently in older and younger adults. Journal of Speech Language and Hearing Research 62 5 (2019) 1258\u20131277.","DOI":"10.1044\/2018_JSLHR-S-17-0222"},{"key":"e_1_3_3_2_68_2","doi-asserted-by":"crossref","unstructured":"Constantina Maltezou-Papastylianou Reinhold Scherer and Silke Paulmann. 2025. How do voice acoustics affect the perceived trustworthiness of a speaker? A systematic review. Frontiers in Psychology 16 (2025) 1495456.","DOI":"10.3389\/fpsyg.2025.1495456"},{"key":"e_1_3_3_2_69_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376479"},{"key":"e_1_3_3_2_70_2","doi-asserted-by":"crossref","unstructured":"Brian McFee Colin Raffel Dawen Liang Daniel\u00a0PW Ellis Matt McVicar Eric Battenberg and Oriol Nieto. 2015. librosa: Audio and music signal analysis in python. SciPy 2015 (2015) 18\u201324.","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"e_1_3_3_2_71_2","doi-asserted-by":"publisher","DOI":"10.1145\/2800835.2804397"},{"key":"e_1_3_3_2_72_2","doi-asserted-by":"crossref","unstructured":"Anna-Lena Meiners Martin Schrepp Andreas Hinderks and J\u00f6rg Thomaschewski. 2024. A benchmark for the UEQ+ framework: construction of a simple tool to quickly interpret UEQ+ KPIs. (2024).","DOI":"10.9781\/ijimai.2023.05.003"},{"key":"e_1_3_3_2_73_2","doi-asserted-by":"publisher","DOI":"10.1145\/3630106.3658993"},{"key":"e_1_3_3_2_74_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-62302-9_4"},{"key":"e_1_3_3_2_75_2","doi-asserted-by":"publisher","DOI":"10.1145\/3571884.3597129"},{"key":"e_1_3_3_2_76_2","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173580"},{"key":"e_1_3_3_2_77_2","unstructured":"Syed Meesam\u00a0Raza Naqvi Muhammad\u00a0Ali Tahir Kamran Javed Hassan\u00a0Aqeel Khan Ali Raza and Zubair Saeed. 2024. Code-mixed street address recognition and accent adaptation for voice-activated navigation services. IEEE Access (2024)."},{"key":"e_1_3_3_2_78_2","unstructured":"Hien\u00a0Trang Nguyen. 2024. Enhancing Error Handling User Experience With Voice User Interface. Master\u2019s thesis. New York University Tandon School of Engineering."},{"key":"e_1_3_3_2_79_2","doi-asserted-by":"crossref","unstructured":"Andreea Niculescu Betsy Van\u00a0Dijk Anton Nijholt Haizhou Li and Swee\u00a0Lan See. 2013. Making social robots more attractive: the effects of voice pitch humor and empathy. International journal of social robotics 5 2 (2013) 171\u2013191.","DOI":"10.1007\/s12369-012-0171-x"},{"key":"e_1_3_3_2_80_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCNT45670.2019.8944754"},{"key":"e_1_3_3_2_81_2","unstructured":"Matthias Paulik Matt Seigel Henry Mason Dominic Telaar Joris Kluivers Rogier Van\u00a0Dalen Chi\u00a0Wai Lau Luke Carlson Filip Granqvist Chris Vandevelde et\u00a0al. 2021. Federated evaluation and tuning for on-device personalization: System design & applications. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2102.08503 (2021)."},{"key":"e_1_3_3_2_82_2","volume-title":"Designing voice user interfaces: Principles of conversational experiences","author":"Pearl Cathy","year":"2016","unstructured":"Cathy Pearl. 2016. Designing voice user interfaces: Principles of conversational experiences. \" O\u2019Reilly Media, Inc.\"."},{"key":"e_1_3_3_2_83_2","doi-asserted-by":"publisher","DOI":"10.1145\/3334480.3382792"},{"key":"e_1_3_3_2_84_2","doi-asserted-by":"crossref","unstructured":"Alex Pentland. 2007. Social signal processing [exploratory DSP]. IEEE Signal Processing Magazine 24 4 (2007) 108\u2013111.","DOI":"10.1109\/MSP.2007.4286569"},{"key":"e_1_3_3_2_85_2","volume-title":"Honest signals: how they shape our world","author":"Pentland Alex","year":"2010","unstructured":"Alex Pentland. 2010. Honest signals: how they shape our world. MIT press."},{"key":"e_1_3_3_2_86_2","doi-asserted-by":"publisher","DOI":"10.1145\/3570211.3570215"},{"key":"e_1_3_3_2_87_2","doi-asserted-by":"publisher","DOI":"10.1145\/3640794.3665545"},{"key":"e_1_3_3_2_88_2","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3174214"},{"key":"e_1_3_3_2_89_2","doi-asserted-by":"publisher","DOI":"10.1109\/SoutheastCon56624.2025.10971452"},{"key":"e_1_3_3_2_90_2","doi-asserted-by":"crossref","unstructured":"Alice Richardson. 2010. Nonparametric statistics for non-statisticians: A step-by-step approach by Gregory W. Corder Dale I. Foreman.","DOI":"10.1111\/j.1751-5823.2010.00122_6.x"},{"key":"e_1_3_3_2_91_2","doi-asserted-by":"crossref","unstructured":"Harry\u00a0B Santoso Martin Schrepp Lintang\u00a0M Hasani Rian Fitriansyah and Arief Setyanto. 2022. The use of User Experience Questionnaire Plus (UEQ+) for cross-cultural UX research: evaluating Zoom and Learn Quran Tajwid as online learning tools. Heliyon 8 11 (2022).","DOI":"10.1016\/j.heliyon.2022.e11748"},{"key":"e_1_3_3_2_92_2","doi-asserted-by":"crossref","unstructured":"Klaus\u00a0R Scherer Harvey London and Jared\u00a0J Wolf. 1973. The voice of confidence: Paralinguistic cues and audience evaluation. Journal of Research in Personality 7 1 (1973) 31\u201344.","DOI":"10.1016\/0092-6566(73)90030-5"},{"key":"e_1_3_3_2_93_2","doi-asserted-by":"crossref","unstructured":"Martin Schrepp Andreas Hinderks and Jorg Thomaschewski. 2017. Construction of a benchmark for the user experience questionnaire (UEQ). International Journal of Interactive Multimedia and Artificial Intelligence 4 4 (2017) 40\u201344.","DOI":"10.9781\/ijimai.2017.445"},{"key":"e_1_3_3_2_94_2","first-page":"10","volume-title":"Mensch und Computer 2021-Workshopband","author":"Schrepp Martin","year":"2021","unstructured":"Martin Schrepp, Heike Sandk\u00fchler, and J\u00f6rg Thomaschewski. 2021. How to create short forms of UEQ+ based questionnaires?. In Mensch und Computer 2021-Workshopband. Gesellschaft f\u00fcr Informatik eV, 10\u201318420."},{"key":"e_1_3_3_2_95_2","doi-asserted-by":"crossref","unstructured":"Martin Schrepp and Jorg Thomaschewski. 2019. Design and validation of a framework for the creation of user experience questionnaires. IJIMAI 5 7 (2019) 88\u201395.","DOI":"10.9781\/ijimai.2019.06.006"},{"key":"e_1_3_3_2_96_2","first-page":"1","volume-title":"Mensch & Computer","author":"Schrepp Martin","year":"2019","unstructured":"Martin Schrepp and J\u00f6rg Thomaschewski. 2019. Handbook for the modular extension of the User Experience Questionnaire. In Mensch & Computer. 1\u201319."},{"key":"e_1_3_3_2_97_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411763.3451712"},{"key":"e_1_3_3_2_98_2","unstructured":"Urmila Shrawankar and Vilas\u00a0M Thakare. 2013. Techniques for feature extraction in speech recognition system: A comparative study. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1305.1145 (2013)."},{"key":"e_1_3_3_2_99_2","doi-asserted-by":"crossref","unstructured":"Pascal Snow Alejandra Ruiz-Segura Pierre-Majorique L\u00e9ger Sylvain S\u00e9n\u00e9cal Constantinos Coursaris Romain Pourchon Sarah Cosby and Ariane Beauchesne. 2025. Breaking the bias: integrating physiological and self-reported data to improve UX researchers\u2019 accuracy and empathy. Computers in Human Behavior Reports 19 (2025) 100723.","DOI":"10.1016\/j.chbr.2025.100723"},{"key":"e_1_3_3_2_100_2","first-page":"1098","volume-title":"Proceedings of the Human Factors and Ergonomics Society Annual Meeting","volume":"68","author":"Sogemeier Denise","year":"2024","unstructured":"Denise Sogemeier, Yannick Forster, Frederik Naujoks, Josef\u00a0F Krems, and Andreas Keinath. 2024. The Importance of Timing\u2014An Expert Evaluation on Latencies for Voice Assistants. In Proceedings of the Human Factors and Ergonomics Society Annual Meeting , Vol.\u00a068. SAGE Publications Sage CA: Los Angeles, CA, 1098\u20131100."},{"key":"e_1_3_3_2_101_2","doi-asserted-by":"publisher","DOI":"10.1145\/3064663.3064770"},{"key":"e_1_3_3_2_102_2","volume-title":"Proc. IEEE Int. Conf. Acoustics Speech Signal Process","author":"Soman Vikrant","year":"2010","unstructured":"Vikrant Soman and Anmol Madan. 2010. Social signaling: Predicting the outcome of job interviews from vocal tone and prosody. In Proc. IEEE Int. Conf. Acoustics Speech Signal Process."},{"key":"e_1_3_3_2_103_2","doi-asserted-by":"crossref","unstructured":"Meishu Song Adria Mallol-Ragolta Emilia Parada-Cabaleiro Zijiang Yang Shuo Liu Zhao Ren Ziping Zhao and Bj\u00f6rn\u00a0W Schuller. 2021. Frustration recognition from speech during game interaction using wide residual networks. Virtual Reality & Intelligent Hardware 3 1 (2021) 76\u201386.","DOI":"10.1016\/j.vrih.2020.10.004"},{"key":"e_1_3_3_2_104_2","doi-asserted-by":"crossref","unstructured":"Danya Swoboda Jared Boasen Pierre-Majorique L\u00e9ger Romain Pourchon and Sylvain S\u00e9n\u00e9cal. 2022. Comparing the effectiveness of speech and physiological features in explaining emotional responses during voice user interface interactions. Applied Sciences 12 3 (2022) 1269.","DOI":"10.3390\/app12031269"},{"key":"e_1_3_3_2_105_2","doi-asserted-by":"crossref","unstructured":"Jo\u00e3o\u00a0Paulo Teixeira Carla Oliveira and Carla Lopes. 2013. Vocal acoustic analysis\u2013jitter shimmer and hnr parameters. Procedia technology 9 (2013) 1112\u20131122.","DOI":"10.1016\/j.protcy.2013.12.124"},{"key":"e_1_3_3_2_106_2","doi-asserted-by":"crossref","unstructured":"Jose Tribolet and Ronald Crochiere. 2003. Frequency domain coding of speech. IEEE Transactions on Acoustics Speech and Signal Processing 27 5 (2003) 512\u2013530.","DOI":"10.1109\/TASSP.1979.1163283"},{"key":"e_1_3_3_2_107_2","doi-asserted-by":"crossref","unstructured":"Martine Van\u00a0Puyvelde Xavier Neyt Francis McGlone and Nathalie Pattyn. 2018. Voice stress analysis: A new framework for voice and effort in human performance. Frontiers in psychology 9 (2018) 1994.","DOI":"10.3389\/fpsyg.2018.01994"},{"key":"e_1_3_3_2_108_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445536"},{"key":"e_1_3_3_2_109_2","doi-asserted-by":"crossref","unstructured":"Willem\u00a0A Wagenaar. 1969. Note on the construction of digram-balanced Latin squares. Psychological Bulletin 72 6 (1969) 384.","DOI":"10.1037\/h0028329"},{"key":"e_1_3_3_2_110_2","first-page":"316","volume-title":"National Conference on Man-Machine Speech Communication","author":"Wang Yihui","year":"2023","unstructured":"Yihui Wang, Haocheng Lu, and Gaowu Wang. 2023. A pilot study on the prosodic factors influencing voice attractiveness of ai speech. In National Conference on Man-Machine Speech Communication. Springer, 316\u2013329."},{"key":"e_1_3_3_2_111_2","doi-asserted-by":"crossref","unstructured":"Xinlei Xu Dongdong Li Yijun Zhou and Zhe Wang. 2022. Multi-type features separating fusion learning for Speech Emotion Recognition. Applied Soft Computing 130 (2022) 109648.","DOI":"10.1016\/j.asoc.2022.109648"},{"key":"e_1_3_3_2_112_2","unstructured":"Chen Yu Paul\u00a0M Aoki and Allison Woodruff. 2004. Detecting user engagement in everyday conversations. arXiv preprint cs\/0410027 (2004)."},{"key":"e_1_3_3_2_113_2","doi-asserted-by":"crossref","unstructured":"Mona Zavichi Andr\u00e9 Santos Catarina Moreira Anderson Maciel and Joaquim Jorge. 2025. Gaze\u2013Hand Steering for Travel and Multitasking in Virtual Environments. Multimodal Technologies and Interaction 9 6 (2025) 61.","DOI":"10.3390\/mti9060061"}],"event":{"name":"CHI 2026: CHI Conference on Human Factors in Computing Systems","location":"Barcelona Spain","acronym":"CHI '26","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2026 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3772318.3791747","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,25]],"date-time":"2026-06-25T05:59:59Z","timestamp":1782367199000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3772318.3791747"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,13]]},"references-count":112,"alternative-id":["10.1145\/3772318.3791747","10.1145\/3772318"],"URL":"https:\/\/doi.org\/10.1145\/3772318.3791747","relation":{},"subject":[],"published":{"date-parts":[[2026,4,13]]},"assertion":[{"value":"2026-04-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}