{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T11:29:49Z","timestamp":1764588589083,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3688986","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"11383-11389","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Towards Engagement Prediction: A Cross-Modality Dual-Pipeline Approach using Visual and Audio Features"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8622-7251","authenticated-orcid":false,"given":"Deepak","family":"Kumar","sequence":"first","affiliation":[{"name":"Indian Institute of Technology Roorkee, Roorkee, Uttarakhand, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-3774-8117","authenticated-orcid":false,"given":"Surbhi","family":"Madan","sequence":"additional","affiliation":[{"name":"Indian Institute of Technology Ropar, Rupnagar, Punjab, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5372-3355","authenticated-orcid":false,"given":"Pradeep","family":"Singh","sequence":"additional","affiliation":[{"name":"Indian Institute of Technology Roorkee, Roorkee, Uttarakhand, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2230-1440","authenticated-orcid":false,"given":"Abhinav","family":"Dhall","sequence":"additional","affiliation":[{"name":"Flinders University &amp; Indian Institute of Technology Ropar, Adelaide, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6277-6267","authenticated-orcid":false,"given":"Balasubramanian","family":"Raman","sequence":"additional","affiliation":[{"name":"Indian Institute of Technology Roorkee, Roorkee, Uttarakhand, India"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1177\/1096348017753521"},{"key":"e_1_3_2_1_2_1","volume-title":"Manuel Montes-y G\u00f3mez, and Fabio A. Gonz\u00e1lez","author":"Arevalo John","year":"2020","unstructured":"John Arevalo, Thamar Solorio, Manuel Montes-y G\u00f3mez, and Fabio A. Gonz\u00e1lez. 2020. Gated multimodal networks. (2020)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548363"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2798607"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2016.7477553"},{"key":"e_1_3_2_1_6_1","volume-title":"David Dale, Ning Dong, Mark Duppenthaler, Paul-Ambroise Duquenne, Brian Ellis, Hady Elsahar, Justin Haaheim, et al.","author":"Chung Yu-An","year":"2023","unstructured":"Lo\"ic Barrault, Yu-An Chung, Mariano Coria Meglioli, David Dale, Ning Dong, Mark Duppenthaler, Paul-Ambroise Duquenne, Brian Ellis, Hady Elsahar, Justin Haaheim, et al. 2023. Seamless: Multilingual Expressive and Streaming Speech Translation. arXiv preprint arXiv:2312.05187 (2023)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3136755.3136780"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3577190.3616545"},{"key":"e_1_3_2_1_9_1","first-page":"175","article-title":"Advanced, analytic, automated (AAA) measurement of engagement during learning","volume":"50","author":"D'Mello Sidney K","year":"2015","unstructured":"Sidney K D'Mello, Erik Dieterle, and Angela Duckworth. 2015. Advanced, analytic, automated (AAA) measurement of engagement during learning. Educational Psychologist, Vol. 50, 3 (2015), 175--189.","journal-title":"Educational Psychologist"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Florian Eyben Klaus R Scherer Bj\u00f6rn W Schuller Johan Sundberg Elisabeth Andr\u00e9 Carlos Busso Laurence Y Devillers Julien Epps Petri Laukka Shrikanth S Narayanan et al. 2015. The Geneva minimalistic acoustic parameter set (GeMAPS) for voice research and affective computing. IEEE transactions on affective computing Vol. 7 2 (2015) 190--202.","DOI":"10.1109\/TAFFC.2015.2457417"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10648-019-09514-z"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340555.3355714"},{"volume-title":"Prediction and localization of student engagement in the wild. In 2018 Digital Image Computing: Techniques and Applications (DICTA)","author":"Kaur Amanjot","key":"e_1_3_2_1_14_1","unstructured":"Amanjot Kaur, Aamir Mustafa, Love Mehta, and Abhinav Dhall. 2018. Prediction and localization of student engagement in the wild. In 2018 Digital Image Computing: Techniques and Applications (DICTA). IEEE, 1--8."},{"key":"e_1_3_2_1_15_1","unstructured":"G\u00fcnter Klambauer Thomas Unterthiner Andreas Mayr and Sepp Hochreiter. 2017. Self-normalizing neural networks. In Advances in Neural Information Processing Systems (NeurIPS). 972--981."},{"key":"e_1_3_2_1_16_1","volume-title":"Speech-Based Automatic Prediction of Interview Traits. In International Conference on Computer Vision and Image Processing. Springer, 586--596","author":"Kumar Deepak","year":"2022","unstructured":"Deepak Kumar and Balasubramanian Raman. 2022. Speech-Based Automatic Prediction of Interview Traits. In International Conference on Computer Vision and Image Processing. Springer, 586--596."},{"key":"e_1_3_2_1_17_1","volume-title":"Efficient Low-rank Multimodal Fusion With Modality-Specific Factors. In Annual Meeting of the Association for Computational Linguistics. https:\/\/api.semanticscholar.org\/CorpusID:44131945","author":"Liu Zhun","year":"2018","unstructured":"Zhun Liu, Ying Shen, Varun Bharadhwaj Lakshminarasimhan, Paul Pu Liang, Amir Zadeh, and Louis-Philippe Morency. 2018. Efficient Low-rank Multimodal Fusion With Modality-Specific Factors. In Annual Meeting of the Association for Computational Linguistics. https:\/\/api.semanticscholar.org\/CorpusID:44131945"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3462244.3479901"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612858"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3689004"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3613851"},{"key":"e_1_3_2_1_22_1","unstructured":"MultiMediate. 2024. MultiMediate Challenge 2024. Retrieved from https:\/\/multimediate-challenge.org."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3551589"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3172944.3172969"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3204493.3204549"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3479219"},{"key":"e_1_3_2_1_27_1","volume-title":"Kun Guo, Seop Hyeong Park, and Hongliang Ding.","author":"Pan Sicheng","year":"2023","unstructured":"Sicheng Pan, Gary JW Xu, Kun Guo, Seop Hyeong Park, and Hongliang Ding. 2023. Video-based engagement estimation of game streamers: An interpretable multimodal neural network approach. IEEE Transactions on Games (2023)."},{"key":"e_1_3_2_1_28_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3577190.3614164"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2388676.2388684"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1656"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612857"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00044"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2014.2316163"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612873"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612852"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3688986","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3688986","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:29Z","timestamp":1750295849000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3688986"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":36,"alternative-id":["10.1145\/3664647.3688986","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3688986","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}