{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,29]],"date-time":"2025-12-29T19:02:57Z","timestamp":1767034977833,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":77,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T00:00:00Z","timestamp":1730678400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"DFG","award":["442607480"],"award-info":[{"award-number":["442607480"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100020959","name":"JST-Mirai Program","doi-asserted-by":"publisher","award":["JPMJCR20G6"],"award-info":[{"award-number":["JPMJCR20G6"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100020959","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,4]]},"DOI":"10.1145\/3678957.3685757","type":"proceedings-article","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T04:35:53Z","timestamp":1730262953000},"page":"224-233","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Multilingual Dyadic Interaction Corpus NoXi+J: Toward Understanding Asian-European Non-verbal Cultural Characteristics and their Influences on Engagement"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3064-599X","authenticated-orcid":false,"given":"Marius","family":"Funk","sequence":"first","affiliation":[{"name":"University of Augsburg, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9260-0403","authenticated-orcid":false,"given":"Shogo","family":"Okada","sequence":"additional","affiliation":[{"name":"Japan Advanced Institute of Science and Technology, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2367-162X","authenticated-orcid":false,"given":"Elisabeth","family":"Andr\u00e9","sequence":"additional","affiliation":[{"name":"Human-Centered Artificial Intelligence, University of Augsburg, Germany"}]}],"member":"320","published-online":{"date-parts":[[2024,11,4]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1037\/0022-3514.69.3.518"},{"volume-title":"Nonverbal communication","author":"Andersen A","key":"e_1_3_2_1_2_1","unstructured":"Peter\u00a0A Andersen. 1998. Nonverbal communication. Mayfield Publishing, Maidenhead, England."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1093\/oxfordhb"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO54536.2021.9615999"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2013.48"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Max Bain Jaesung Huh Tengda Han and Andrew Zisserman. 2023. WhisperX: Time-Accurate Speech Transcription of Long-Form Audio. arxiv:2303.00747\u00a0[cs.SD]","DOI":"10.21437\/Interspeech.2023-78"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2764921"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.3115\/1708376.1708411"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2020.09.004"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.5555\/2936924.2937059"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3136755.3136780"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.5555\/3545946.3598652"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.5898\/JHRI.1.1.Chao"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1515\/mult.2005.24.3.237"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","unstructured":"Soumia Dermouche and Catherine Pelachaud. 2019. Engagement Modeling in Dyadic Interaction. 440\u2013445. https:\/\/doi.org\/10.1145\/3340555.3353765","DOI":"10.1145\/3340555.3353765"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3234149"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1037\/0033-2909.115.2.268"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-012-9218-5"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.lingua.2023.103629"},{"key":"e_1_3_2_1_20_1","unstructured":"C.C. Fries. 1952. The Structure of English: An Introduction to the Construction of English Sentences. Harcourt Brace. https:\/\/books.google.co.jp\/books?id=YEw3AAAAIAAJ"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","unstructured":"Nan Gao Mohammad Saiedur\u00a0Rahaman Wei Shao and Flora\u00a0D Salim. 2021. Investigating the Reliability of Self-report Data in the Wild: The Quest for Ground Truth. In Adjunct Proceedings of the 2021 ACM International Joint Conference on Pervasive and Ubiquitous Computing and Proceedings of the 2021 ACM International Symposium on Wearable Computers (Virtual USA) (UbiComp\/ISWC \u201921 Adjunct). Association for Computing Machinery New York NY USA 237\u2013242. https:\/\/doi.org\/10.1145\/3460418.3479338","DOI":"10.1145\/3460418.3479338"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","unstructured":"Xiao Ge Chunchen Xu Daigo Misaki Hazel\u00a0Rose Markus and Jeanne\u00a0L Tsai. 2024. How Culture Shapes What People Want From AI. https:\/\/doi.org\/10.48550\/arXiv.2403.05104","DOI":"10.48550\/arXiv.2403.05104"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2015.7344688"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","unstructured":"Nadine Glas and Catherine Pelachaud. 2015. Definitions of Engagement in Human-Agent Interaction. 944\u2013949. https:\/\/doi.org\/10.1109\/ACII.2015.7344688","DOI":"10.1109\/ACII.2015.7344688"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1037\/0022-3514.59.6.1216"},{"key":"e_1_3_2_1_26_1","unstructured":"E.T. Hall. 1976. Beyond Culture. Knopf Doubleday Publishing Group. https:\/\/books.google.co.jp\/books?id=sgiNzwEACAAJ"},{"key":"e_1_3_2_1_27_1","unstructured":"Edward\u00a0Twitchell Hall. 1959. The Silent Language. https:\/\/api.semanticscholar.org\/CorpusID:143072138"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.17077\/etd.p4yv50ow"},{"key":"e_1_3_2_1_29_1","volume-title":"A proof of the conjecture that the Tukey-Kramer multiple comparisons procedure is conservative. The Annals of Statistics","author":"Hayter J","year":"1984","unstructured":"Anthony\u00a0J Hayter. 1984. A proof of the conjecture that the Tukey-Kramer multiple comparisons procedure is conservative. The Annals of Statistics (1984), 61\u201375."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2598510.2598594"},{"volume-title":"Culture\u2019s consequences (2 ed.)","author":"Hofstede Geert","key":"e_1_3_2_1_31_1","unstructured":"Geert Hofstede. 2001. Culture\u2019s consequences (2 ed.). SAGE Publications, Thousand Oaks, CA."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1111\/lnc3.12432"},{"key":"e_1_3_2_1_33_1","volume-title":"Activity Context Representation(AAAI Workshop - Technical Report). 40\u201343. 2012 AAAI Workshop ; Conference date: 23-07-2012 Through 23-07-2012","author":"Yin Hsiao Joey Chiao","year":"2012","unstructured":"Joey Chiao Yin Hsiao, Wan Rong Jih, and Jane Yung Jen Hsu. 2012. Recognizing Continuous Social Engagement Level in Dyadic Conversation by Using Turn-taking and Speech Emotion Patterns. In Activity Context Representation(AAAI Workshop - Technical Report). 40\u201343. 2012 AAAI Workshop ; Conference date: 23-07-2012 Through 23-07-2012."},{"key":"e_1_3_2_1_34_1","volume-title":"Back-Channeling in Japanese Conversations:Features of Back-Channeling that Create Good Impressions","author":"Ichinohara Kazue","year":"2015","unstructured":"Kazue Ichinohara. 2015. Back-Channeling in Japanese Conversations:Features of Back-Channeling that Create Good Impressions. Tokyo Woman\u2019s Christian University studies in language and culture: Studies in language and culture 23 (03 2015), 1\u201315. https:\/\/cir.nii.ac.jp\/crid\/1050845762588661248"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1527\/tjsai.38-3_J-M91"},{"volume-title":"9th International Workshop on Spoken Dialogue System Technology, Luis\u00a0Fernando D\u2019Haro, Rafael\u00a0E. Banchs","author":"Inoue Koji","key":"e_1_3_2_1_36_1","unstructured":"Koji Inoue, Divesh Lala, Katsuya Takanashi, and Tatsuya Kawahara. 2019. Latent Character Model for Engagement Recognition Based on Multimodal Behaviors. In 9th International Workshop on Spoken Dialogue System Technology, Luis\u00a0Fernando D\u2019Haro, Rafael\u00a0E. Banchs, and Haizhou Li (Eds.). Springer Singapore, Singapore, 119\u2013130."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1200155109"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445449"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.pragma.2007.02.009"},{"key":"e_1_3_2_1_40_1","unstructured":"M.L. Knapp J.A. Hall and T.G. Horgan. 1972. Nonverbal Communication in Human Interaction. Cengage Learning. https:\/\/books.google.co.jp\/books?id=rWoWAAAAQBAJ"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2019.2944808"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1016\/0147-1767(78)90029-9"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2015.00731"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1080\/17475750600909253"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.11517\/pjsai.JSAI2020.0_3F5ES204"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Xiguang Li Candy\u00a0Olivia Mawalim and Shogo Okada. 2023. Inter-person Intra-modality Attention Based Model for\u00a0Dyadic Interaction Engagement Prediction. In Social Computing and Social Media Adela Coman and Simona Vasilache (Eds.). Springer Nature Switzerland Cham 91\u2013105.","DOI":"10.1007\/978-3-031-35915-6_8"},{"key":"e_1_3_2_1_47_1","volume-title":"Workshop on Multimodal Corpora at ICMI-MLMI","author":"Lu Jia","year":"2011","unstructured":"Jia Lu, Jens Allwood, and Elisabeth Ahlsen. 2011. A Study on Cultural Variations of Smile Based on Empirical Recordings of Chinese and Swedish First Encounters. Workshop on Multimodal Corpora at ICMI-MLMI, Alicante, Spain (2011)."},{"volume-title":"Nonverbal communication: Science and applications","author":"Matsumoto David","key":"e_1_3_2_1_48_1","unstructured":"David Matsumoto, Mark\u00a0G Frank, and Hyi\u00a0Sung Hwang. 2012. Nonverbal communication: Science and applications. Sage Publications."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1007\/bf00987239"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1016\/0378-2166(87)90181-0"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1016\/0378-2166(90)90097-W"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2010.5583006"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3613851"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3551589"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/1719970.1719990"},{"key":"e_1_3_2_1_56_1","volume-title":"Human-Computer Interaction \u2013 INTERACT","author":"Newn Joshua","year":"2019","unstructured":"Joshua Newn, Ronal Singh, Fraser Allison, Prashan Madumal, Eduardo Velloso, and Frank Vetere. 2019. Designing Interactions with Intention-Aware Gaze1-Enabled Artificial Agents. In Human-Computer Interaction \u2013 INTERACT 2019, David Lamas, Fernando Loizides, Lennart Nacke, Helen Petrie, Marco Winckler, and Panayiotis Zaphiris (Eds.). Springer International Publishing, Cham, 255\u2013281."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2020.00092"},{"key":"e_1_3_2_1_58_1","unstructured":"Daniel Ortega Sarina Meyer Antje Schweitzer and Ngoc\u00a0Thang Vu. 2023. Modeling Speaker-Listener Interaction for Backchannel Prediction. arxiv:2304.04472\u00a0[cs.CL]"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1177\/1609406919899220"},{"key":"e_1_3_2_1_60_1","unstructured":"I. Poggi. 2007. Mind Hands Face and Body: A Goal and Belief View of Multimodal Communication. Weidler. https:\/\/books.google.co.jp\/books?id=_xjoOAAACAAJ"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i10.21368"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2013.6553805"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1207\/s15327752jpa6303_8"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1017\/S0047404500001019"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2005.03.005"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2020.101178"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1016\/0169-7439(89)80095-4"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1037\/emo0000459"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2005.05.009"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.2307\/3001913"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/T-AFFC.2011.27"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502223"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1099-0992(1998110)28:6<879::AID-EJSP901>3.0.CO;2-W"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"crossref","unstructured":"Jinhan Wang Long Chen Aparna Khare Anirudh Raju Pranav Dheram Di He Minhua Wu Andreas Stolcke and Venkatesh Ravichandran. 2024. Turn-taking and Backchannel Prediction with Acoustic and Large Language Model Fusion. arxiv:2401.14717\u00a0[cs.CL]","DOI":"10.1109\/ICASSP48485.2024.10447196"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1017\/S0047404500013270"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1460-2466.1975.tb00582.x"},{"volume-title":"CLS-70","author":"Yngve H.","key":"e_1_3_2_1_77_1","unstructured":"Victor\u00a0H. Yngve. 1970. On getting a word in edgewise. In CLS-70. University of Chicago, 567\u2013577."}],"event":{"name":"ICMI '24: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","acronym":"ICMI '24","location":"San Jose Costa Rica"},"container-title":["International Conference on Multimodel Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3678957.3685757","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3678957.3685757","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:10:12Z","timestamp":1750295412000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3678957.3685757"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,4]]},"references-count":77,"alternative-id":["10.1145\/3678957.3685757","10.1145\/3678957"],"URL":"https:\/\/doi.org\/10.1145\/3678957.3685757","relation":{},"subject":[],"published":{"date-parts":[[2024,11,4]]},"assertion":[{"value":"2024-11-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}