{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T09:04:44Z","timestamp":1765357484363,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":27,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"The Hong Kong Polytechnic University","award":["P0039489"],"award-info":[{"award-number":["P0039489"]}]},{"name":"Hong Kong Research Grant Council","award":["15600219"],"award-info":[{"award-number":["15600219"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612870","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:30Z","timestamp":1698391650000},"page":"9586-9590","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Unveiling Subtle Cues: Backchannel Detection Using Temporal Multimodal Attention Networks"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6267-0016","authenticated-orcid":false,"given":"Kangzhong","family":"Wang","sequence":"first","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3591-9089","authenticated-orcid":false,"given":"MK Michael","family":"Cheung","sequence":"additional","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0907-7998","authenticated-orcid":false,"given":"Youqian","family":"Zhang","sequence":"additional","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5435-6083","authenticated-orcid":false,"given":"Chunxi","family":"Yang","sequence":"additional","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9055-4239","authenticated-orcid":false,"given":"Peter Q.","family":"Chen","sequence":"additional","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1048-1904","authenticated-orcid":false,"given":"Eugene Yujun","family":"Fu","sequence":"additional","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2027-168X","authenticated-orcid":false,"given":"Grace","family":"Ngai","sequence":"additional","affiliation":[{"name":"The Hong Kong Polytechnic University, Hong Kong, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Backchannel Detection and Agreement Estimation from Video with Transformer Networks. arXiv preprint arXiv:2306.01656","author":"Amer Ahmed","year":"2023","unstructured":"Ahmed Amer, Chirag Bhuvaneshwara, Gowtham K Addluri, Mohammed M Shaik, Vedant Bonde, and Philipp M\u00fcller. 2023. Backchannel Detection and Agreement Estimation from Video with Transformer Networks. arXiv preprint arXiv:2306.01656 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473","author":"Bahdanau Dzmitry","year":"2014","unstructured":"Dzmitry Bahdanau, Kyunghyun Cho, and Yoshua Bengio. 2014. Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 (2014)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2018.00019"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15892-6_21"},{"key":"e_1_3_2_1_5_1","volume-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555","author":"Chung Junyoung","year":"2014","unstructured":"Junyoung Chung, Caglar Gulcehre, KyungHyun Cho, and Yoshua Bengio. 2014. Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555 (2014)."},{"volume-title":"Face-to-face interaction: Research, methods, and theory","author":"Duncan Starkey","key":"e_1_3_2_1_6_1","unstructured":"Starkey Duncan and Donald W Fiske. 2015. Face-to-face interaction: Research, methods, and theory. Routledge."},{"key":"e_1_3_2_1_7_1","volume-title":"Stephen CF Chan, and Daniel TL Shek.","author":"Fu Eugene Yujun","year":"2023","unstructured":"Eugene Yujun Fu, Grace Ngai, Hong Va Leong, Stephen CF Chan, and Daniel TL Shek. 2023. Using attention-based neural networks for predicting student learning outcomes in service-learning. Education and Information Technologies (2023), 1--27."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3479230"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2005-400"},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the Conference. association for Computational Linguistics. meeting","volume":"2018","author":"Gu Yue","year":"2018","unstructured":"Yue Gu, Kangning Yang, Shiyu Fu, Shuhong Chen, Xinyu Li, and Ivan Marsic. 2018. Hybrid attention based multimodal network for spoken language classification. In Proceedings of the Conference. association for Computational Linguistics. meeting, Vol. 2018. NIH Public Access, 2379."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-29513-4_31"},{"key":"e_1_3_2_1_12_1","first-page":"161","article-title":"Classense: a mobile digital backchannel system for monitoring class morale","volume":"1","author":"Jiranantanagorn Peerumporn","year":"2015","unstructured":"Peerumporn Jiranantanagorn, Haifeng Shen, Robert Goodwin, and Kung-Keat Teoh. 2015. Classense: a mobile digital backchannel system for monitoring class morale. International Journal of Learning and Teaching 1, 2 (2015), 161--167.","journal-title":"International Journal of Learning and Teaching"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1037\/0022-3514.35.7.523"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Jing Liu Mitja Nikolaus K\u00fcbra Bodur and Abdellah Fourtassi. 2022. Predicting backchannel signaling in child-caregiver multimodal conversations. In Companion publication of the 2022 international conference on multimodal interaction. 196--200.","DOI":"10.1145\/3536220.3563372"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3613851"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3551589"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3172944.3172969"},{"key":"e_1_3_2_1_18_1","first-page":"14200","article-title":"Attention bottlenecks for multimodal fusion","volume":"34","author":"Nagrani Arsha","year":"2021","unstructured":"Arsha Nagrani, Shan Yang, Anurag Arnab, Aren Jansen, Cordelia Schmid, and Chen Sun. 2021. Attention bottlenecks for multimodal fusion. Advances in Neural Information Processing Systems 34 (2021), 14200--14213.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.03.091"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054223"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15892-6_16"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1017319110294"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3551605"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.2478\/jaiscr-2019-0006"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.119899"},{"key":"e_1_3_2_1_26_1","volume-title":"2019 34th IEEE\/ACM International Conference on Automated Software Engineering (ASE). IEEE, 13--25","author":"Shu Jingdong","year":"2019","unstructured":"YaoWan, Jingdong Shu, Yulei Sui, Guandong Xu, Zhou Zhao, JianWu, and Philip Yu. 2019. Multi-modal attention network learning for semantic source code retrieval. In 2019 34th IEEE\/ACM International Conference on Automated Software Engineering (ASE). IEEE, 13--25."},{"key":"e_1_3_2_1_27_1","first-page":"38571","article-title":"Vitpose: Simple vision transformer baselines for human pose estimation","volume":"35","author":"Xu Yufei","year":"2022","unstructured":"Yufei Xu, Jing Zhang, Qiming Zhang, and Dacheng Tao. 2022. Vitpose: Simple vision transformer baselines for human pose estimation. Advances in Neural Information Processing Systems 35 (2022), 38571--38584.","journal-title":"Advances in Neural Information Processing Systems"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Ottawa ON Canada","acronym":"MM '23"},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612870","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612870","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:01:16Z","timestamp":1755820876000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612870"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":27,"alternative-id":["10.1145\/3581783.3612870","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612870","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}