{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T08:00:36Z","timestamp":1776931236709,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,10,12]],"date-time":"2026-10-12T00:00:00Z","timestamp":1791763200000},"content-version":"vor","delay-in-days":365,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["IIS 2005430"],"award-info":[{"award-number":["IIS 2005430"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,13]]},"DOI":"10.1145\/3716553.3750755","type":"proceedings-article","created":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T13:13:16Z","timestamp":1760188396000},"page":"200-208","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing Gaze Prediction in Multi-Party Conversations via Speaker-Aware Multimodal Adaptation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-9726-1153","authenticated-orcid":false,"given":"Meng-Chen","family":"Lee","sequence":"first","affiliation":[{"name":"Department of Computer Science, University of Houston, Houston, Texas, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2571-5865","authenticated-orcid":false,"given":"Zhigang","family":"Deng","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Houston, Houston, Texas, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,10,12]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Reginald\u00a0B Adams\u00a0Jr and Robert\u00a0E Kleck. 2005. Effects of direct and averted gaze on the perception of facially communicated emotion. Emotion 5 1 (2005) 3.","DOI":"10.1037\/1528-3542.5.1.3"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.cmcl-1.8"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"Geert Br\u00f4ne Bert Oben Annelies Jehoul Jelena Vranjes and Kurt Feyaerts. 2017. Eye gaze and viewpoint in multimodal interaction management. Cognitive Linguistics 28 3 (2017) 449\u2013483.","DOI":"10.1515\/cog-2016-0119"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00544"},{"key":"e_1_3_3_1_6_2","unstructured":"Jacob Devlin. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1810.04805 (2018)."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3025644"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Alexandra Frischen Andrew\u00a0P Bayliss and Steven\u00a0P Tipper. 2007. Gaze cueing of attention: visual attention social cognition and individual differences. Psychological bulletin 133 4 (2007) 694.","DOI":"10.1037\/0033-2909.133.4.694"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Tzvi Ganel Yonatan Goshen-Gottstein and Melvyn\u00a0A Goodale. 2005. Interactions between the processing of gaze direction and facial expression. Vision research 45 9 (2005) 1191\u20131200.","DOI":"10.1016\/j.visres.2004.06.025"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412330"},{"key":"e_1_3_3_1_11_2","unstructured":"Jia-Chen Gu Chongyang Tao Zhen-Hua Ling Can Xu Xiubo Geng and Daxin Jiang. 2021. MPC-BERT: A pre-trained language model for multi-party conversation understanding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2106.01541 (2021)."},{"key":"e_1_3_3_1_12_2","first-page":"1590","volume-title":"Proceedings of the Asian Conference on Computer Vision","author":"Guo Hang","year":"2022","unstructured":"Hang Guo, Zhengxi Hu, and Jingtai Liu. 2022. Mgtr: End-to-end mutual gaze detection with transformer. In Proceedings of the Asian Conference on Computer Vision. 1590\u20131605."},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"crossref","unstructured":"Carlos\u00a0T Ishi Daichi Machiyashiki Ryusuke Mikata and Hiroshi Ishiguro. 2018. A speech-driven hand gesture generation method and evaluation in android robots. IEEE Robotics and Automation Letters 3 4 (2018) 3757\u20133764.","DOI":"10.1109\/LRA.2018.2856281"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1145\/3561975.3562954"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Aobo Jin Qixin Deng Yuting Zhang and Zhigang Deng. 2019. A deep learning-based model for head and eye motion generation in three-party conversations. Proceedings of the ACM on Computer Graphics and Interactive Techniques 2 2 (2019) 1\u201319.","DOI":"10.1145\/3340250"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"Kristiina Jokinen Hirohisa Furukawa Masafumi Nishida and Seiichi Yamamoto. 2013. Gaze and turn-taking behavior in casual conversational interactions. ACM Transactions on Interactive Intelligent Systems (TiiS) 3 2 (2013) 1\u201330.","DOI":"10.1145\/2499474.2499481"},{"key":"e_1_3_3_1_17_2","first-page":"1","volume-title":"Proc. International Conference on Methods and Techniques in Behavioral Research","author":"Kraaij Wessel","year":"2005","unstructured":"Wessel Kraaij, Thomas Hain, Mike Lincoln, and Wilfried Post. 2005. The AMI meeting corpus. In Proc. International Conference on Methods and Techniques in Behavioral Research. 1\u20134."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"Liliana Laranjo Adam\u00a0G Dunn Huong\u00a0Ly Tong Ahmet\u00a0Baki Kocaballi Jessica Chen Rabia Bashir Didi Surian Blanca Gallego Farah Magrabi Annie\u00a0YS Lau et\u00a0al. 2018. Conversational agents in healthcare: a systematic review. Journal of the American Medical Informatics Association 25 9 (2018) 1248\u20131258.","DOI":"10.1093\/jamia\/ocy072"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1199"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3678957.3685742"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3652988.3673915"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3577190.3614139"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.cmcl-1.9"},{"key":"e_1_3_3_1_24_2","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1711.05101 (2017)."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W15-4640"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.cmcl-1.11"},{"key":"e_1_3_3_1_27_2","first-page":"171","volume-title":"Proceedings of Symposium on Human Interface 2011","author":"Otsuka Kazuhiro","year":"2011","unstructured":"Kazuhiro Otsuka. 2011. Multimodal conversation scene analysis for understanding people\u2019s communicative behaviors in face-to-face meetings. In Proceedings of Symposium on Human Interface 2011. Springer, 171\u2013179."},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1231"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_3_1_30_2","unstructured":"Colin Raffel Noam Shazeer Adam Roberts Katherine Lee Sharan Narang Michael Matena Yanqi Zhou Wei Li and Peter\u00a0J Liu. 2020. Exploring the limits of transfer learning with a unified text-to-text transformer. Journal of machine learning research 21 140 (2020) 1\u201367."},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/2401836.2401841"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593580"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00224"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.cmcl-1.16"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i9.28883"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.273"}],"event":{"name":"ICMI '25: International Conference on Multimodal Interaction","location":"Canberra Australia","acronym":"ICMI '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 27th International Conference on Multimodal Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3716553.3750755","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3716553.3750755","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,26]],"date-time":"2026-01-26T22:27:32Z","timestamp":1769466452000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3716553.3750755"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"references-count":35,"alternative-id":["10.1145\/3716553.3750755","10.1145\/3716553"],"URL":"https:\/\/doi.org\/10.1145\/3716553.3750755","relation":{},"subject":[],"published":{"date-parts":[[2025,10,12]]},"assertion":[{"value":"2025-10-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}