{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,27]],"date-time":"2026-06-27T20:21:40Z","timestamp":1782591700697,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["490909448"],"award-info":[{"award-number":["490909448"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3762076","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:54:17Z","timestamp":1761375257000},"page":"14150-14155","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["<scp>MultiMediate<\/scp>\n                    '25: Cross-cultural Multi-domain Engagement Estimation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-8831-9795","authenticated-orcid":false,"given":"Daksitha Senel","family":"Withanage Don","sequence":"first","affiliation":[{"name":"University of Augsburg, Augsburg, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3064-599X","authenticated-orcid":false,"given":"Marius","family":"Funk","sequence":"additional","affiliation":[{"name":"University of Augsburg, Augsburg, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7153-9984","authenticated-orcid":false,"given":"Michal","family":"Balazia","sequence":"additional","affiliation":[{"name":"INRIA Sophia Antipolis, Sophia Antipolis, France"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7792-0241","authenticated-orcid":false,"given":"Huajian","family":"Qiu","sequence":"additional","affiliation":[{"name":"University of Stuttgart, Stuttgart, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9260-0403","authenticated-orcid":false,"given":"Shogo","family":"Okada","sequence":"additional","affiliation":[{"name":"JAIST, Ishikawa, Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2988-2142","authenticated-orcid":false,"given":"Fran\u00e7ois","family":"Br\u00e9mond","sequence":"additional","affiliation":[{"name":"INRIA Sophia Antipolis, Sophia Antipolis, France"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6676-3145","authenticated-orcid":false,"given":"Jan","family":"Alexandersson","sequence":"additional","affiliation":[{"name":"DFKI, Saarbr\u00fccken, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6317-7303","authenticated-orcid":false,"given":"Andreas","family":"Bulling","sequence":"additional","affiliation":[{"name":"University of Stuttgart, Stuttgart, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2367-162X","authenticated-orcid":false,"given":"Elisabeth","family":"Andr\u00e9","sequence":"additional","affiliation":[{"name":"University of Augsburg, Augsburg, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7037-7100","authenticated-orcid":false,"given":"Philipp","family":"M\u00fcller","sequence":"additional","affiliation":[{"name":"DFKI, Saarbr\u00fccken, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"WhisperX: Time-Accurate Speech Transcription of Long-Form Audio. INTERSPEECH 2023","author":"Bain Max","year":"2023","unstructured":"Max Bain, Jaesung Huh, Tengda Han, and Andrew Zisserman. 2023. WhisperX: Time-Accurate Speech Transcription of Long-Form Audio. INTERSPEECH 2023 (2023)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548363"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2018.00019"},{"key":"e_1_3_2_1_4_1","volume-title":"David Dale, Ning Dong, Mark Duppenthaler, Paul-Ambroise Duquenne, Brian Ellis, Hady Elsahar, Justin Haaheim, et al.","author":"Barrault Lo\u00efc","year":"2023","unstructured":"Lo\u00efc Barrault, Yu-An Chung, Mariano Coria Meglioli, David Dale, Ning Dong, Mark Duppenthaler, Paul-Ambroise Duquenne, Brian Ellis, Hady Elsahar, Justin Haaheim, et al. 2023. Seamless: Multilingual Expressive and Streaming Speech Translation. arXiv preprint arXiv:2312.05187 (2023)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2401836.2401846"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3136755.3136780"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.143"},{"key":"e_1_3_2_1_8_1","volume-title":"Unsupervised Cross-lingual Representation Learning at Scale. CoRR abs\/1911.02116","author":"Conneau Alexis","year":"2019","unstructured":"Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzm\u00e1n, Edouard Grave, Myle Ott, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Unsupervised Cross-lingual Representation Learning at Scale. CoRR abs\/1911.02116 (2019). arXiv:1911.02116 http:\/\/arxiv.org\/abs\/1911.02116"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2015.2457417"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3678957.3685757"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10648-019-09514-z"},{"key":"e_1_3_2_1_12_1","unstructured":"Pooja Guhan Naman Awasthi Kristin Bussell Dinesh Manocha Gloria Reeves Aniket Bera et al. 2020. Developing an Effective and Automated Patient Engagement Estimator for Telehealth: A Machine Learning Approach. arXiv preprint arXiv:2011.08690 (2020)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP51287.2024.10647692"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.aaz3791"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3-030--78114--9_19"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1915768117"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3688988"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.2307\/2532051"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3689004"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3613851"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3551589"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3479219"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3204493.3204549"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3172944.3172969"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2020.00092"},{"key":"e_1_3_2_1_28_1","unstructured":"Tom O'Malley Elie Bursztein James Long Fran\u00e7ois Chollet Haifeng Jin Luca Invernizzi et al. 2019. KerasTuner. https:\/\/github.com\/keras-team\/keras-tuner."},{"key":"e_1_3_2_1_29_1","volume-title":"Shang-Wen Li, Ishan Misra, Michael Rabbat, Vasu Sharma, Gabriel Synnaeve, Hu Xu, Herve Jegou, Julien Mairal, Patrick Labatut, Armand Joulin, and Piotr Bojanowski.","author":"Oquab Maxime","year":"2024","unstructured":"Maxime Oquab, Timoth\u00e9e Darcet, Th\u00e9o Moutakanni, Huy V. Vo, Marc Szafraniec, Vasil Khalidov, Pierre Fernandez, Daniel HAZIZA, Francisco Massa, Alaaeldin El-Nouby, Mido Assran, Nicolas Ballas, Wojciech Galuba, Russell Howes, Po- Yao Huang, Shang-Wen Li, Ishan Misra, Michael Rabbat, Vasu Sharma, Gabriel Synnaeve, Hu Xu, Herve Jegou, Julien Mairal, Patrick Labatut, Armand Joulin, and Piotr Bojanowski. 2024. DINOv2: Learning Robust Visual Features without Supervision. Transactions on Machine Learning Research (2024). https:\/\/openreview.net\/forum?id=a68SUt6zFt Featured Certification."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.3301687"},{"key":"e_1_3_2_1_31_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_32_1","volume-title":"International Conference on Machine Learning. PMLR, 28492--28518","author":"Radford Alec","year":"2023","unstructured":"Alec Radford, JongWook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2023. Robust speech recognition via large-scale weak supervision. In International Conference on Machine Learning. PMLR, 28492--28518."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2015.7163129"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/HRI.2010.5453163"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/1957656.1957781"},{"key":"e_1_3_2_1_36_1","volume-title":"Elisabeth Andr\u00e9, and Tobias Baur.","author":"Schiller Dominik","year":"2024","unstructured":"Dominik Schiller, Tobias Hallmen, Daksitha Withanage Don, Elisabeth Andr\u00e9, and Tobias Baur. 2024. DISCOVER: A Data-driven Interactive System for Comprehensive Observation, Visualization, and ExploRation of Human Behaviour. arXiv preprint arXiv:2407.13408 (2024)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988257.2988258"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.clsr.2021.105567"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01398"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0378--2166(99)00109--5"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3678957.3685704"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3746027.3762077"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3746027.3762079"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3746027.3762078"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3762076","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:37:26Z","timestamp":1765309046000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3762076"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":44,"alternative-id":["10.1145\/3746027.3762076","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3762076","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}