{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T08:12:15Z","timestamp":1773389535376,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,8]],"date-time":"2024-07-08T00:00:00Z","timestamp":1720396800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung","doi-asserted-by":"publisher","award":["PZ00P2_216294"],"award-info":[{"award-number":["PZ00P2_216294"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,8]]},"DOI":"10.1145\/3640794.3665541","type":"proceedings-article","created":{"date-parts":[[2024,7,7]],"date-time":"2024-07-07T06:24:56Z","timestamp":1720333496000},"page":"1-14","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Multimodal Dialog Act Classification for Digital Character Conversations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-9782-8118","authenticated-orcid":false,"given":"Philine","family":"Witzig","sequence":"first","affiliation":[{"name":"Department of Computer Science, ETH Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3458-5663","authenticated-orcid":false,"given":"Rares","family":"Constantin","sequence":"additional","affiliation":[{"name":"Department of Computer Science, ETH Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1337-7670","authenticated-orcid":false,"given":"Nikola","family":"Kovacevic","sequence":"additional","affiliation":[{"name":"Department of Computer Science, ETH Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0158-1305","authenticated-orcid":false,"given":"Rafael","family":"Wampfler","sequence":"additional","affiliation":[{"name":"Department of Computer Science, ETH Zurich, Switzerland"}]}],"member":"320","published-online":{"date-parts":[[2024,7,8]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC\u201916)","author":"Amanova Dilafruz","year":"2016","unstructured":"Dilafruz Amanova, Volha Petukhova, and Dietrich Klakow. 2016. Creating Annotated Dialogue Resources: Cross-domain Dialogue Act Classification. In Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC\u201916), Nicoletta Calzolari, Khalid Choukri, Thierry Declerck, Sara Goggi, Marko Grobelnik, Bente Maegaard, Joseph Mariani, Helene Mazo, Asuncion Moreno, Jan Odijk, and Stelios Piperidis (Eds.). European Language Resources Association (ELRA), Portoro\u017e, Slovenia, 111\u2013117. https:\/\/aclanthology.org\/L16-1017"},{"key":"e_1_3_2_1_2_1","volume-title":"Interacting with embodied conversational agents. Speech technology: Theory and applications 1","author":"Andr\u00e9 Elisabeth","year":"2010","unstructured":"Elisabeth Andr\u00e9 and Catherine Pelachaud. 2010. Interacting with embodied conversational agents. Speech technology: Theory and applications 1 (2010), 123\u2013149."},{"key":"e_1_3_2_1_3_1","unstructured":"Ltd AudioStack Aflorithmic\u00a0Labs. 2023. AudioStack - AI Audio Production. https:\/\/aflorithmic.ai [Accessed: (02.10.2023)]."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15892-6_21"},{"key":"e_1_3_2_1_5_1","volume-title":"Social dialongue with embodied conversational agents. Advances in natural multimodal dialogue systems 1","author":"Bickmore Timothy","year":"2005","unstructured":"Timothy Bickmore and Justine Cassell. 2005. Social dialongue with embodied conversational agents. Advances in natural multimodal dialogue systems 1 (2005), 23\u201354."},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC","author":"Bothe Chandrakant","year":"2018","unstructured":"Chandrakant Bothe, Cornelius Weber, Sven Magg, and Stefan Wermter. 2018. A Context-based Approach for Dialogue Act Recognition using Simple Recurrent Neural Networks. In Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018), Nicoletta Calzolari, Khalid Choukri, Christopher Cieri, Thierry Declerck, Sara Goggi, Koiti Hasida, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, H\u00e9l\u00e8ne Mazo, Asuncion Moreno, Jan Odijk, Stelios Piperidis, and Takenobu Tokunaga (Eds.). European Language Resources Association (ELRA), Miyazaki, Japan. https:\/\/aclanthology.org\/L18-1307"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-560"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9052974"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACII52823.2021.9597428"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/302979.303150"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747490"},{"key":"e_1_3_2_1_12_1","unstructured":"Maximillian Chen Alexandros Papangelis Chenyang Tao Andy Rosenbaum Seokhwan Kim Yang Liu Zhou Yu and Dilek Hakkani-Tur. 2022. Weakly Supervised Data Augmentation Through Prompting for Dialogue Understanding. arxiv:2210.14169\u00a0[cs.CL]"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3209997"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6259"},{"key":"e_1_3_2_1_16_1","volume-title":"The Geneva affective picture database (GAPED): a new 730-picture database focusing on valence and normative significance. Behavior research methods 43","author":"Dan-Glauser S.","year":"2011","unstructured":"Elise\u00a0S. Dan-Glauser and Klaus\u00a0R. Scherer. 2011. The Geneva affective picture database (GAPED): a new 730-picture database focusing on valence and normative significance. Behavior research methods 43 (2011), 468\u2013477."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1423"},{"key":"e_1_3_2_1_18_1","volume-title":"Facial expressions and speech acts: experimental evidences on the role of the upper face as an illocutionary force indicating device in language comprehension. Cognitive processing 18","author":"Domaneschi Filippo","year":"2017","unstructured":"Filippo Domaneschi, Marcello Passarelli, and Carlo Chiorri. 2017. Facial expressions and speech acts: experimental evidences on the role of the upper face as an illocutionary force indicating device in language comprehension. Cognitive processing 18 (2017), 285\u2013306."},{"key":"e_1_3_2_1_19_1","unstructured":"Epic Games. 2023. Live Link Face. Mobile Application. https:\/\/dev.epicgames.com\/community\/learning\/tutorials\/lEYe\/unreal-engine-facial-capture-with-live-link"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","unstructured":"Spandana Gella Aishwarya Padmakumar Patrick Lange and Dilek Hakkani-Tur. 2022. Dialog Acts for Task Driven Embodied Agents. In Proceedings of the 23rd Annual Meeting of the Special Interest Group on Discourse and Dialogue Oliver Lemon Dilek Hakkani-Tur Junyi\u00a0Jessy Li Arash Ashrafzadeh Daniel\u00a0Hern\u00e1ndez Garcia Malihe Alikhani David Vandyke and Ond\u0159ej Du\u0161ek (Eds.). Association for Computational Linguistics Edinburgh UK 111\u2013123. https:\/\/doi.org\/10.18653\/v1\/2022.sigdial-1.13","DOI":"10.18653\/v1\/2022.sigdial-1.13"},{"key":"e_1_3_2_1_21_1","volume-title":"Discourse analysis. Sublanguage: Studies of language in restricted semantic domains 1","author":"Grosz J","year":"1982","unstructured":"Barbara\u00a0J Grosz. 1982. Discourse analysis. Sublanguage: Studies of language in restricted semantic domains 1 (1982), 138\u2013174."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-57351-9_30"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-emnlp.185"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.pragma.2005.07.005"},{"key":"e_1_3_2_1_27_1","volume-title":"Discourse Relations and Discourse Markers","author":"Jurafsky Daniel","unstructured":"Daniel Jurafsky, Elizabeth Shriberg, Barbara Fox, and Traci Curl. 1998. Lexical, Prosodic, and Syntactic Cues for Dialog Acts. In Discourse Relations and Discourse Markers. The Association for Computational Linguistics, Montreal, Quebec, Canada. https:\/\/aclanthology.org\/W98-0319"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Harshit Kumar Arvind Agarwal Riddhiman Dasgupta and Sachindra Joshi. 2018. Dialogue act sequence labeling using hierarchical encoder with CRF. In Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence and Thirtieth Innovative Applications of Artificial Intelligence Conference and Eighth AAAI Symposium on Educational Advances in Artificial Intelligence (New Orleans Louisiana USA) (AAAI\u201918\/IAAI\u201918\/EAAI\u201918 Vol.\u00a032). AAAI Press New Orleans 8\u00a0pages.","DOI":"10.1609\/aaai.v32i1.11701"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1205"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095751"},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers), Greg Kondrak and Taro Watanabe (Eds.). Asian Federation of Natural Language Processing","author":"Li Yanran","year":"2017","unstructured":"Yanran Li, Hui Su, Xiaoyu Shen, Wenjie Li, Ziqiang Cao, and Shuzi Niu. 2017. DailyDialog: A Manually Labelled Multi-turn Dialogue Dataset. In Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers), Greg Kondrak and Taro Watanabe (Eds.). Asian Federation of Natural Language Processing, Taipei, Taiwan, 986\u2013995. https:\/\/aclanthology.org\/I17-1099"},{"key":"e_1_3_2_1_32_1","unstructured":"Yinhan Liu Myle Ott Naman Goyal Jingfei Du Mandar Joshi Danqi Chen Omer Levy Mike Lewis Luke Zettlemoyer and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. arxiv:1907.11692\u00a0[cs.CL]"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461371"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6391"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i15.17616"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.5555\/3618408.3619590"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1373"},{"key":"e_1_3_2_1_38_1","unstructured":"Eug\u00e9nio Ribeiro Ricardo Ribeiro and David\u00a0Martins de Matos. 2017. The Influence of Context on Dialogue Act Recognition. arxiv:1506.00839\u00a0[cs.CL]"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAU.1969.1162058"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/s12559-019-09704-5"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139173438"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","unstructured":"Guokan Shang Antoine Tixier Michalis Vazirgiannis and Jean-Pierre Lorr\u00e9. 2020. Speaker-change Aware CRF for Dialogue Act Classification. In Proceedings of the 28th International Conference on Computational Linguistics Donia Scott Nuria Bel and Chengqing Zong (Eds.). International Committee on Computational Linguistics Barcelona Spain (Online) 450\u2013464. https:\/\/doi.org\/10.18653\/v1\/2020.coling-main.40","DOI":"10.18653\/v1\/2020.coling-main.40"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3026823"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2011-119"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1162\/089120100561737"},{"key":"e_1_3_2_1_46_1","volume-title":"Dialogue act modeling for automatic tagging and recognition of conversational speech. Computational linguistics 26, 3","author":"Stolcke Andreas","year":"2000","unstructured":"Andreas Stolcke, Klaus Ries, Noah Coccaro, Elizabeth Shriberg, Rebecca Bates, Daniel Jurafsky, Paul Taylor, Rachel Martin, Carol\u00a0Van Ess-Dykema, and Marie Meteer. 2000. Dialogue act modeling for automatic tagging and recognition of conversational speech. Computational linguistics 26, 3 (2000), 339\u2013373."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2006-535"},{"key":"e_1_3_2_1_48_1","volume-title":"Proceedings of the ACL\u201999 Workshop Towards Standards and Tools for Discourse Tagging. Association for Computational Linguistics","author":"R.","unstructured":"David\u00a0R. Traum and Christine\u00a0H. Nakatani. 1999. A Two-level Approach to Coding Dialogue for Discourse Structure: Activities of the 1998 DRI Working Group on Higher-level Structures. In Proceedings of the ACL\u201999 Workshop Towards Standards and Tools for Discourse Tagging. Association for Computational Linguistics, College Park, Maryland, USA, 101\u2013108. https:\/\/aclanthology.org\/W99-0313"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2001.990517"},{"key":"e_1_3_2_1_50_1","volume-title":"Proceedings of the European conference on computer vision (ECCV) workshops. Springer Cham","author":"Wang Xintao","year":"2018","unstructured":"Xintao Wang, Ke Yu, Shixiang Wu, Jinjin Gu, Yihao Liu, Chao Dong, Yu Qiao, and Chen Change\u00a0Loy. 2018. ESRGAN: Enhanced super-resolution generative adversarial networks. In Proceedings of the European conference on computer vision (ECCV) workshops. Springer Cham, Munich, Germany."},{"key":"e_1_3_2_1_51_1","unstructured":"Yuqiao Wen Guoqing Luo and Lili Mou. 2022. An Empirical Study on the Overlapping Problem of Open-Domain Dialogue Datasets. In Proceedings of the Thirteenth Language Resources and Evaluation Conference Nicoletta Calzolari Fr\u00e9d\u00e9ric B\u00e9chet Philippe Blache Khalid Choukri Christopher Cieri Thierry Declerck Sara Goggi Hitoshi Isahara Bente Maegaard Joseph Mariani H\u00e9l\u00e8ne Mazo Jan Odijk and Stelios Piperidis (Eds.). European Language Resources Association Marseille France 146\u2013153. https:\/\/aclanthology.org\/2022.lrec-1.16"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-acl.286"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413690"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1121\/1.2935783"},{"key":"e_1_3_2_1_55_1","unstructured":"Weixiang Zhao Yanyan Zhao Xin Lu Shilong Wang Yanpeng Tong and Bing Qin. 2023. Is ChatGPT Equipped with Emotional Dialogue Capabilities?arxiv:2304.09582\u00a0[cs.CL]"}],"event":{"name":"CUI '24: ACM Conversational User Interfaces 2024","location":"Luxembourg Luxembourg","acronym":"CUI '24","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["ACM Conversational User Interfaces 2024"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640794.3665541","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3640794.3665541","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T18:05:44Z","timestamp":1755885944000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640794.3665541"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,8]]},"references-count":55,"alternative-id":["10.1145\/3640794.3665541","10.1145\/3640794"],"URL":"https:\/\/doi.org\/10.1145\/3640794.3665541","relation":{},"subject":[],"published":{"date-parts":[[2024,7,8]]},"assertion":[{"value":"2024-07-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}