{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T15:47:45Z","timestamp":1775144865526,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T00:00:00Z","timestamp":1730678400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-2005430"],"award-info":[{"award-number":["IIS-2005430"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,4]]},"DOI":"10.1145\/3678957.3685742","type":"proceedings-article","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T04:35:53Z","timestamp":1730262953000},"page":"57-65","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Online Multimodal End-of-Turn Prediction for Three-party Conversations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-9726-1153","authenticated-orcid":false,"given":"Meng-Chen","family":"Lee","sequence":"first","affiliation":[{"name":"Department of Computer Science, University of Houston, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0452-8676","authenticated-orcid":false,"given":"Zhigang","family":"Deng","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Houston, United States"}]}],"member":"320","published-online":{"date-parts":[[2024,11,4]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proc. NAACL workshop on adaptation in dialogue systems. 2\u20138.","author":"Bell Linda","year":"2001","unstructured":"Linda Bell, Johan Boye, and Joakim Gustafson. 2001. Real-time handling of fragmented utterances. In Proc. NAACL workshop on adaptation in dialogue systems. 2\u20138."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2070481.2070507"},{"key":"e_1_3_2_1_3_1","volume-title":"Language models are few-shot learners. Advances in neural information processing systems 33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared\u00a0D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020), 1877\u20131901."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1515\/cog-2016-0119"},{"key":"e_1_3_2_1_5_1","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery Aakanksha","year":"2023","unstructured":"Aakanksha Chowdhery, Sharan Narang, Jacob Devlin, Maarten Bosma, Gaurav Mishra, Adam Roberts, Paul Barham, Hyung\u00a0Won Chung, Charles Sutton, Sebastian Gehrmann, 2023. Palm: Scaling language modeling with pathways. Journal of Machine Learning Research 24, 240 (2023), 1\u2013113.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_6_1","volume-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555","author":"Chung Junyoung","year":"2014","unstructured":"Junyoung Chung, Caglar Gulcehre, KyungHyun Cho, and Yoshua Bengio. 2014. Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555 (2014)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1647314.1647332"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1353\/lan.2006.0130"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2021.616471"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-020-09866-x"},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019. Association for Computational Linguistics, 4171\u20134186."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3025644"},{"key":"e_1_3_2_1_13_1","volume-title":"Group communication analysis: A computational linguistics approach for detecting sociocognitive roles in multiparty interactions. Behavior research methods 51","author":"Dowell MM","year":"2019","unstructured":"Nia\u00a0MM Dowell, Tristan\u00a0M Nixon, and Arthur\u00a0C Graesser. 2019. Group communication analysis: A computational linguistics approach for detecting sociocognitive roles in multiparty interactions. Behavior research methods 51 (2019), 1007\u20131041."},{"key":"e_1_3_2_1_14_1","volume-title":"Some signals and rules for taking speaking turns in conversations.Journal of personality and social psychology 23, 2","author":"Duncan Starkey","year":"1972","unstructured":"Starkey Duncan. 1972. Some signals and rules for taking speaking turns in conversations.Journal of personality and social psychology 23, 2 (1972), 283."},{"key":"e_1_3_2_1_15_1","volume-title":"Interactional units in conversation: Syntactic, intonational, and pragmatic resources for the management of turns. Studies in interactional sociolinguistics 13","author":"Ford E","year":"1996","unstructured":"Cecilia\u00a0E Ford and Sandra\u00a0A Thompson. 1996. Interactional units in conversation: Syntactic, intonational, and pragmatic resources for the management of turns. Studies in interactional sociolinguistics 13 (1996), 134\u2013184."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01563"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2010.10.003"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2663204.2663277"},{"key":"e_1_3_2_1_19_1","first-page":"364","article-title":"Prediction of turn-taking using multitask learning with prediction of backchannels and fillers","volume":"162","author":"Hara Kohei","year":"2018","unstructured":"Kohei Hara, Koji Inoue, Katsuya Takanashi, and Tatsuya Kawahara. 2018. Prediction of turn-taking using multitask learning with prediction of backchannels and fillers. Listener 162 (2018), 364.","journal-title":"Listener"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0136905"},{"key":"e_1_3_2_1_21_1","volume-title":"The 10th International Conference on Autonomous Agents and Multiagent Systems-Volume 3. Citeseer, 1289\u20131290","author":"Huang Lixing","year":"2011","unstructured":"Lixing Huang, Louis-Philippe Morency, and Jonathan Gratch. 2011. A multimodal end-of-turn prediction model: learning from parasocial consensus sampling. In The 10th International Conference on Autonomous Agents and Multiagent Systems-Volume 3. Citeseer, 1289\u20131290."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2818346.2820755"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178385"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2522848.2522856"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1162\/pres_a_00358"},{"key":"e_1_3_2_1_26_1","first-page":"1","article-title":"S2M-Net: Speech Driven Three-party Conversational Motion Synthesis Networks. In Proceedings of the 15th ACM SIGGRAPH Conference on Motion","volume":"2","author":"Jin Aobo","year":"2022","unstructured":"Aobo Jin, Qixin Deng, and Zhigang Deng. 2022. S2M-Net: Speech Driven Three-party Conversational Motion Synthesis Networks. In Proceedings of the 15th ACM SIGGRAPH Conference on Motion, Interaction and Games. 2:1\u20132:10.","journal-title":"Interaction and Games."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340250"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2499474.2499481"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2012-226"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/AFGR.2000.840610"},{"key":"e_1_3_2_1_31_1","volume-title":"An analysis of turn-taking and backchannels based on prosodic and syntactic features in Japanese map task dialogs. Language and speech 41, 3-4","author":"Koiso Hanae","year":"1998","unstructured":"Hanae Koiso, Yasuo Horiuchi, Syun Tutiya, Akira Ichikawa, and Yasuharu Den. 1998. An analysis of turn-taking and backchannels based on prosodic and syntactic features in Japanese map task dialogs. Language and speech 41, 3-4 (1998), 295\u2013321."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340555.3353727"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3652988.3673915"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3577190.3614139"},{"key":"e_1_3_2_1_35_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2014.02.002"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-21669-5_21"},{"key":"e_1_3_2_1_38_1","volume-title":"Investigating speech features for continuous turn-taking prediction using lstms. arXiv preprint arXiv:1806.11461","author":"Roddy Matthew","year":"2018","unstructured":"Matthew Roddy, Gabriel Skantze, and Naomi Harte. 2018. Investigating speech features for continuous turn-taking prediction using lstms. arXiv preprint arXiv:1806.11461 (2018)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3242969.3242997"},{"key":"e_1_3_2_1_40_1","volume-title":"Studies in the organization of conversational interaction","author":"Sacks Harvey","unstructured":"Harvey Sacks, Emanuel\u00a0A Schegloff, and Gail Jefferson. 1978. A simplest systematics for the organization of turn taking for conversation. In Studies in the organization of conversational interaction. Elsevier, 7\u201355."},{"key":"e_1_3_2_1_41_1","volume-title":"a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108","author":"Sanh Victor","year":"2019","unstructured":"Victor Sanh, Lysandre Debut, Julien Chaumond, and Thomas Wolf. 2019. DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108 (2019)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Ryo Sato Ryuichiro Higashinaka Masafumi Tamoto Mikio Nakano and Kiyoaki Aikawa. 2002. Learning decision trees to determine turn-taking by spoken dialogue systems.. In INTERSPEECH.","DOI":"10.21437\/ICSLP.2002-293"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2006-550"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-5527"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2020.101178"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2818346.2820749"},{"key":"e_1_3_2_1_47_1","volume-title":"Prosodic patterns in English conversation","author":"Ward G","unstructured":"Nigel\u00a0G Ward. 2019. Prosodic patterns in English conversation. Cambridge University Press."}],"event":{"name":"ICMI '24: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","location":"San Jose Costa Rica","acronym":"ICMI '24"},"container-title":["International Conference on Multimodel Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3678957.3685742","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3678957.3685742","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:10:12Z","timestamp":1750295412000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3678957.3685742"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,4]]},"references-count":47,"alternative-id":["10.1145\/3678957.3685742","10.1145\/3678957"],"URL":"https:\/\/doi.org\/10.1145\/3678957.3685742","relation":{},"subject":[],"published":{"date-parts":[[2024,11,4]]},"assertion":[{"value":"2024-11-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}