{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,18]],"date-time":"2026-06-18T16:03:17Z","timestamp":1781798597573,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Anhui Province Key Research and Development Program","award":["202104a05020007"],"award-info":[{"award-number":["202104a05020007"]}]},{"name":"Natural Science Foundation of China","award":["62276242"],"award-info":[{"award-number":["62276242"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612852","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:40Z","timestamp":1698391660000},"page":"9496-9500","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Sliding Window Seq2seq Modeling for Engagement Estimation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3197-8103","authenticated-orcid":false,"given":"Jun","family":"Yu","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8974-3813","authenticated-orcid":false,"given":"Keda","family":"Lu","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3891-9207","authenticated-orcid":false,"given":"Mohan","family":"Jing","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8488-991X","authenticated-orcid":false,"given":"Ziqi","family":"Liang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6000-6631","authenticated-orcid":false,"given":"Bingyuan","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3598-8564","authenticated-orcid":false,"given":"Jianqing","family":"Sun","sequence":"additional","affiliation":[{"name":"Unisound AI Technology Co.,Ltd, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8309-1301","authenticated-orcid":false,"given":"Jiaen","family":"Liang","sequence":"additional","affiliation":[{"name":"Unisound AI Technology Co.,Ltd, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Soundnet: Learning sound representations from unlabeled video. Advances in neural information processing systems","author":"Aytar Yusuf","year":"2016","unstructured":"Yusuf Aytar, Carl Vondrick, and Antonio Torralba. 2016. Soundnet: Learning sound representations from unlabeled video. Advances in neural information processing systems, Vol. 29 (2016)."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2018.00019"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/2401836.2401846"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3136755.3136780"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.143"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/taffc.2015.2457417"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10648-019-09514-z"},{"key":"e_1_3_2_2_8_1","volume-title":"Predicting Backchanneling and Disengagement in Children. arXiv: Human-Computer Interaction (Jul","author":"Goswami Mononito","year":"2020","unstructured":"Mononito Goswami, Minkush Manuja, and Maitree Leekha. 2020. Towards Social & Engaging Peer Learning: Predicting Backchanneling and Disengagement in Children. arXiv: Human-Computer Interaction (Jul 2020)."},{"key":"e_1_3_2_2_9_1","volume-title":"Generating Sequences With Recurrent Neural Networks. arXiv: Neural and Evolutionary Computing (Aug","author":"Graves Alex","year":"2013","unstructured":"Alex Graves. 2013. Generating Sequences With Recurrent Neural Networks. arXiv: Neural and Evolutionary Computing (Aug 2013)."},{"key":"e_1_3_2_2_10_1","volume-title":"Kristin Bussell, Dinesh Manocha, Gloria Reeves, and Aniket Bera.","author":"Guhan Pooja","year":"2020","unstructured":"Pooja Guhan, Naman Awasthi, andKathryn McDonald, Kristin Bussell, Dinesh Manocha, Gloria Reeves, and Aniket Bera. 2020. MET: Multimodal Perception of Engagement for Telehealth. (Nov 2020)."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2016.90"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","unstructured":"Shofiyati Nur Karimah and Shinobu Hasegawa. 2021. Automatic Engagement Recognition for Distance Learning Systems: A Literature Study of Engagement Datasets and Methods. 264--276. https:\/\/doi.org\/10.1007\/978-3-030-78114-9_19","DOI":"10.1007\/978-3-030-78114-9_19"},{"key":"e_1_3_2_2_14_1","volume-title":"Prediction and localization of student engagement in the wild. In 2018 Digital Image Computing: Techniques and Applications (DICTA)","author":"Kaur Amanjot","unstructured":"Amanjot Kaur, Aamir Mustafa, Love Mehta, and Abhinav Dhall. 2018. Prediction and localization of student engagement in the wild. In 2018 Digital Image Computing: Techniques and Applications (DICTA). IEEE, 1--8."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.2307\/2532051"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3613851"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2522848.2522865"},{"key":"e_1_3_2_2_18_1","volume-title":"Recognizing engagement in human-robot interaction. Human-Robot Interaction (Mar","author":"Rich Charles","year":"2010","unstructured":"Charles Rich, Brett Ponsleur, Aaron Holroyd, and CandaceL. Sidner. 2010. Recognizing engagement in human-robot interaction. Human-Robot Interaction (Mar 2010)."},{"key":"e_1_3_2_2_19_1","unstructured":"Khaled Saleh Kun Yu and Fang Chen. [n. d.]. Video-Based Student Engagement Estimation via Time Convolution Neural Networks for Remote Learning. ([n. d.])."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/1957656.1957781"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"key":"e_1_3_2_2_22_1","volume-title":"AVEC 2016 - Depression, Mood, and Emotion Recognition Workshop and Challenge. (May","author":"Valstar Michel","year":"2016","unstructured":"Michel Valstar, Jonathan Gratch, Bj\u00f6rn Schuller, Fabien Ringeval, Denis Lalanne, MercedesTorres Torres, Stefan Scherer, Giota Stratou, Roddy Cowie, and Maja Pantic. 2016. AVEC 2016 - Depression, Mood, and Emotion Recognition Workshop and Challenge. (May 2016)."},{"key":"e_1_3_2_2_23_1","volume-title":"Attention is All you Need. Neural Information Processing Systems (Jun","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, AidanN. Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. Neural Information Processing Systems (Jun 2017)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3242969.3264981"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2007.1110"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612852","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612852","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:13:57Z","timestamp":1755821637000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612852"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":25,"alternative-id":["10.1145\/3581783.3612852","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612852","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}