{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T11:29:51Z","timestamp":1764588591756,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,4]],"date-time":"2024-11-04T00:00:00Z","timestamp":1730678400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Deutsche Forschungsgemeinschaft (DFG)","award":["490909448"],"award-info":[{"award-number":["490909448"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,4]]},"DOI":"10.1145\/3678957.3685704","type":"proceedings-article","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T04:35:53Z","timestamp":1730262953000},"page":"428-438","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Towards Automated Annotation of Infant-Caregiver Engagement Phases with Multimodal Foundation Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-8831-9795","authenticated-orcid":false,"given":"Daksitha Senel","family":"Withanage Don","sequence":"first","affiliation":[{"name":"Chair for Human-Centered Artificial Intelligence, University of Augsburg, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7364-5772","authenticated-orcid":false,"given":"Dominik","family":"Schiller","sequence":"additional","affiliation":[{"name":"Chair for Human-Centered Artificial Intelligence, University of Augsburg, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-6450-5694","authenticated-orcid":false,"given":"Tobias","family":"Hallmen","sequence":"additional","affiliation":[{"name":"Chair for Human-Centered Artificial Intelligence, University of Augsburg, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5230-5218","authenticated-orcid":false,"given":"Silvan","family":"Mertes","sequence":"additional","affiliation":[{"name":"Chair for Human-Centered Artificial Intelligence, University of Augsburg, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2797-605X","authenticated-orcid":false,"given":"Tobias","family":"Baur","sequence":"additional","affiliation":[{"name":"Chair for Human-Centered Artificial Intelligence, University of Augsburg, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1582-9850","authenticated-orcid":false,"given":"Florian","family":"Lingenfelser","sequence":"additional","affiliation":[{"name":"Chair for Human-Centered Artificial Intelligence, University of Augsburg, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8431-540X","authenticated-orcid":false,"given":"Mitho","family":"M\u00fcller","sequence":"additional","affiliation":[{"name":"Department of Psychology, Ludwig-Maximilians-University Munich, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6540-3140","authenticated-orcid":false,"given":"Lea","family":"Kaubisch","sequence":"additional","affiliation":[{"name":"Department of Psychology, Ludwig-Maximilians-University Munich, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0759-1563","authenticated-orcid":false,"given":"Prof. Dr. Corinna","family":"Reck","sequence":"additional","affiliation":[{"name":"Department of Psychology, Ludwig Maximilian University of Munich, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2367-162X","authenticated-orcid":false,"given":"Elisabeth","family":"Andr\u00e9","sequence":"additional","affiliation":[{"name":"Chair for Human-Centered Artificial Intelligence, University of Augsburg, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,11,4]]},"reference":[{"volume-title":"Patterns of Attachment: A Psychological Study of the Strange Situation","author":"Ainsworth Mary","unstructured":"Mary D.\u00a0S. Ainsworth, Mary\u00a0C. Blehar, Everett Waters, and Sally Wall. 1978. Patterns of Attachment: A Psychological Study of the Strange Situation. Lawrence Erlbaum, Oxford.","key":"e_1_3_2_1_1_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_2_1","DOI":"10.1007\/s13218-020-00632-3"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_3_1","DOI":"10.1093\/pch"},{"volume-title":"Mother-infant interaction","author":"Bornstein H","unstructured":"Marc\u00a0H Bornstein and Catherine\u00a0S Tamis-LeMonda. 2001. Mother-infant interaction. In Blackwell handbook of infant development, J.\u00a0Gavin Bremner and Alan Fogel (Eds.). Blackwell Publishing, Malden, 269\u2013295.","key":"e_1_3_2_1_4_1"},{"key":"e_1_3_2_1_5_1","volume-title":"Hogarth","author":"Bowlby John","year":"1982","unstructured":"John Bowlby. 1969. Attachment and Loss, Volume 1: Attachment. Hogarth, London. Reprinted 1982."},{"volume-title":"Hogarth","author":"Bowlby John","unstructured":"John Bowlby. 1980. Attachment and Loss, Volume 3: Loss. Hogarth, London.","key":"e_1_3_2_1_6_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_7_1","DOI":"10.1109\/ICCV.2017.116"},{"doi-asserted-by":"crossref","unstructured":"Mathilde Caron Hugo Touvron Ishan Misra Herv\u00e9 J\u00e9gou Julien Mairal Piotr Bojanowski and Armand Joulin. 2021. Emerging Properties in Self-Supervised Vision Transformers. arxiv:2104.14294\u00a0[cs.CV]","key":"e_1_3_2_1_8_1","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"e_1_3_2_1_9_1","volume-title":"W2v-BERT: Combining Contrastive Learning and Masked Language Modeling for Self-Supervised Speech Pre-Training. CoRR abs\/2108.06209","author":"Chung Yu-An","year":"2021","unstructured":"Yu-An Chung, Yu Zhang, Wei Han, Chung-Cheng Chiu, James Qin, Ruoming Pang, and Yonghui Wu. 2021. W2v-BERT: Combining Contrastive Learning and Masked Language Modeling for Self-Supervised Speech Pre-Training. CoRR abs\/2108.06209 (2021). arXiv:2108.06209https:\/\/arxiv.org\/abs\/2108.06209"},{"doi-asserted-by":"crossref","unstructured":"Roseanne Clark. 1985. Parent-Child Early Relational Assessment (PCERA).","key":"e_1_3_2_1_10_1","DOI":"10.1037\/t07207-000"},{"key":"e_1_3_2_1_11_1","volume-title":"Seamless: Multilingual Expressive and Streaming Speech Translation. ArXiv.","author":"Communication Seamless","year":"2023","unstructured":"Seamless Communication, Lo\u00efc Barrault, Yu-An Chung, Mariano\u00a0Coria Meglioli, David Dale, Ning Dong, Mark Duppenthaler, Paul-Ambroise Duquenne, Brian Ellis, Hady Elsahar, Justin Haaheim, John Hoffman, Min-Jae Hwang, Hirofumi Inaguma, Christopher Klaiber, Ilia Kulikov, Pengwei Li, Daniel Licht, Jean Maillard, Ruslan Mavlyutov, Alice Rakotoarison, Kaushik\u00a0Ram Sadagopan, Abinesh Ramakrishnan, Tuan Tran, Guillaume Wenzek, Yilin Yang, Ethan Ye, Ivan Evtimov, Pierre Fernandez, Cynthia Gao, Prangthip Hansanti, Elahe Kalbassi, Amanda Kallet, Artyom Kozhevnikov, Gabriel Mejia, Robin\u00a0San Roman, Christophe Touret, Corinne Wong, Carleigh Wood, Bokai Yu, Pierre Andrews, Can Balioglu, Peng-Jen Chen, Marta\u00a0R. Costa-juss\u00e0, Maha Elbayad, Hongyu Gong, Francisco Guzm\u00e1n, Kevin Heffernan, Somya Jain, Justine Kao, Ann Lee, Xutai Ma, Alex Mourachko, Benjamin Peloquin, Juan Pino, Sravya Popuri, Christophe Ropers, Safiyyah Saleem, Holger Schwenk, Anna Sun, Paden Tomasello, Changhan Wang, Jeff Wang, Skyler Wang, and Mary Williamson. 2023. Seamless: Multilingual Expressive and Streaming Speech Translation. ArXiv."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_12_1","DOI":"10.1016\/j.infbeh.2010.01.001"},{"volume-title":"Coding interactive behavior manual. https:\/\/ruthfeldmanlab.com\/coding-schemes-interventions\/ Unpublished Manual","author":"Feldman R.","unstructured":"R. Feldman. 1998. Coding interactive behavior manual. https:\/\/ruthfeldmanlab.com\/coding-schemes-interventions\/ Unpublished Manual; Bar-Ilan University, Israel.","key":"e_1_3_2_1_13_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_14_1","DOI":"10.1111\/j.1469-7610.2006.01701.x"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_15_1","DOI":"10.1111\/j.1540-5834.2011.00660.x"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_16_1","DOI":"10.1037\/0012-1649.35.1.223"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_17_1","DOI":"10.1109\/ICCV.2017.622"},{"key":"e_1_3_2_1_18_1","volume-title":"Bootstrap Your Own Latent: A New Approach to Self-Supervised Learning. CoRR abs\/2006.07733","author":"Grill Jean-Bastien","year":"2020","unstructured":"Jean-Bastien Grill, Florian Strub, Florent Altch\u00e9, Corentin Tallec, Pierre\u00a0H. Richemond, Elena Buchatskaya, Carl Doersch, Bernardo\u00a0\u00c1vila Pires, Zhaohan\u00a0Daniel Guo, Mohammad\u00a0Gheshlaghi Azar, Bilal Piot, Koray Kavukcuoglu, R\u00e9mi Munos, and Michal Valko. 2020. Bootstrap Your Own Latent: A New Approach to Self-Supervised Learning. CoRR abs\/2006.07733 (2020). arXiv:2006.07733https:\/\/arxiv.org\/abs\/2006.07733"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_19_1","DOI":"10.1109\/ACII.2019.8925519"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_20_1","DOI":"10.1145\/3382507.3418870"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_21_1","DOI":"10.1038\/tp.2016.82"},{"volume-title":"Pattern Recognition, Shivakumara Palaiahnakote, Gabriella Sanniti\u00a0di Baja, Liang Wang, and Wei\u00a0Qi Yan (Eds.)","author":"Li Honggai","unstructured":"Honggai Li, Jinshi Cui, Li Wang, and Hongbin Zha. 2020. Infant Attachment Prediction Using Vision and Audio Features in Mother-Infant Interaction. In Pattern Recognition, Shivakumara Palaiahnakote, Gabriella Sanniti\u00a0di Baja, Liang Wang, and Wei\u00a0Qi Yan (Eds.). Springer International Publishing, Cham, 489\u2013502.","key":"e_1_3_2_1_22_1"},{"key":"e_1_3_2_1_23_1","volume-title":"HEMM: Holistic Evaluation of Multimodal Foundation Models. arxiv:2407.03418\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2407.03418","author":"Liang Paul\u00a0Pu","year":"2024","unstructured":"Paul\u00a0Pu Liang, Akshay Goindani, Talha Chafekar, Leena Mathur, Haofei Yu, Ruslan Salakhutdinov, and Louis-Philippe Morency. 2024. HEMM: Holistic Evaluation of Multimodal Foundation Models. arxiv:2407.03418\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2407.03418"},{"key":"e_1_3_2_1_24_1","volume-title":"MediaPipe: A Framework for Building Perception Pipelines. CoRR abs\/1906.08172","author":"Lugaresi Camillo","year":"2019","unstructured":"Camillo Lugaresi, Jiuqiang Tang, Hadon Nash, Chris McClanahan, Esha Uboweja, Michael Hays, Fan Zhang, Chuo-Ling Chang, Ming\u00a0Guang Yong, Juhyun Lee, Wan-Teh Chang, Wei Hua, Manfred Georg, and Matthias Grundmann. 2019. MediaPipe: A Framework for Building Perception Pipelines. CoRR abs\/1906.08172 (2019). arXiv:1906.08172http:\/\/arxiv.org\/abs\/1906.08172"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_25_1","DOI":"10.1002\/mpr.1860"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_26_1","DOI":"10.1016\/j.dr.2009.02.001"},{"unstructured":"W.\u00a0R. Mills-Koonce and M. Cox. 2013. Qualitative Ratings for Parent\u2013Child Interaction at 3\u201348 Months of Age. (2013). Unpublished Manuscript.","key":"e_1_3_2_1_27_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_28_1","DOI":"10.1159\/000448404"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_29_1","DOI":"10.1159\/000376586"},{"unstructured":"Maxime Oquab Timoth\u00e9e Darcet Th\u00e9o Moutakanni Huy Vo Marc Szafraniec Vasil Khalidov Pierre Fernandez Daniel Haziza Francisco Massa Alaaeldin El-Nouby Mahmoud Assran Nicolas Ballas Wojciech Galuba Russell Howes Po-Yao Huang Shang-Wen Li Ishan Misra Michael Rabbat Vasu Sharma Gabriel Synnaeve Hu Xu Herv\u00e9 Jegou Julien Mairal Patrick Labatut Armand Joulin and Piotr Bojanowski. 2024. DINOv2: Learning Robust Visual Features without Supervision. arxiv:2304.07193\u00a0[cs.CV]","key":"e_1_3_2_1_30_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_31_1","DOI":"10.1016\/j.infbeh.2014.10.001"},{"key":"e_1_3_2_1_32_1","volume-title":"Language Models are Unsupervised Multitask Learners. OpenAI Blog","author":"Radford Alec","year":"2019","unstructured":"Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, and Ilya Sutskever. 2019. Language Models are Unsupervised Multitask Learners. OpenAI Blog (2019). https:\/\/cdn.openai.com\/better-language-models\/language_models_are_unsupervised_multitask_learners.pdf"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_33_1","DOI":"10.1037\/cou0000407"},{"volume-title":"(ICEP-R)","author":"Reck Claudia","unstructured":"Claudia Reck, Daniela Noe, Francesca Cenciotti, Edward Tronick, and Karen\u00a0M. Weinberg. 2009. Infant and Caregiver Engagement Phases, German Revised Ed. (ICEP-R). Unknown Publisher, Unknown Location.","key":"e_1_3_2_1_34_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_35_1","DOI":"10.1371\/journal.pone.0194763"},{"key":"e_1_3_2_1_36_1","volume-title":"DISCOVER: A Data-driven Interactive System for Comprehensive Observation, Visualization, and ExploRation of Human Behaviour. arxiv:2407.13408\u00a0[cs.HC] https:\/\/arxiv.org\/abs\/2407.13408","author":"Schiller Dominik","year":"2024","unstructured":"Dominik Schiller, Tobias Hallmen, Daksitha\u00a0Withanage Don, Elisabeth Andr\u00e9, and Tobias Baur. 2024. DISCOVER: A Data-driven Interactive System for Comprehensive Observation, Visualization, and ExploRation of Human Behaviour. arxiv:2407.13408\u00a0[cs.HC] https:\/\/arxiv.org\/abs\/2407.13408"},{"key":"e_1_3_2_1_37_1","volume-title":"UCF101: A Dataset of 101 Human Actions Classes From Videos in The Wild. arXiv preprint arXiv:1212.0402","author":"Soomro Khurram","year":"2012","unstructured":"Khurram Soomro, Amir\u00a0Roshan Zamir, and Mubarak Shah. 2012. UCF101: A Dataset of 101 Human Actions Classes From Videos in The Wild. arXiv preprint arXiv:1212.0402 (2012)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_38_1","DOI":"10.1016\/S0002-7138(09)61752-0"},{"volume-title":"Before Speech: The Beginning of Human Communication","author":"Trevarthen Colwyn","unstructured":"Colwyn Trevarthen. 1979. Communication and Cooperation in Early Infancy: A Description of Primary Intersubjectivity. In Before Speech: The Beginning of Human Communication, Margaret Bullowa (Ed.). Cambridge University Press, London, 321\u2013347.","key":"e_1_3_2_1_39_1"},{"unstructured":"Colwyn Trevarthen and M. Bullowa. 1979. Communication and cooperation in early infancy: A description of primary intersubjectivity. Before Speech (Cambridge) (01 1979) 321\u2013347.","key":"e_1_3_2_1_40_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_41_1","DOI":"10.1016\/S0002-7138(09)62273-1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_42_1","DOI":"10.1016\/S0002-7138(09)62273-1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_43_1","DOI":"10.1109\/ACCESS.2017.2778011"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_44_1","DOI":"10.1037\/0012-1649.37.5.684"},{"doi-asserted-by":"crossref","unstructured":"Everett Waters. 1987. Attachment Q-Set (AQS).","key":"e_1_3_2_1_45_1","DOI":"10.1037\/t17841-000"},{"key":"e_1_3_2_1_46_1","first-page":"9","article-title":"Infant and Caregiver Engagement Phases (ICEP): A behavioral system for assessing mutual regulation in infant-caregiver dyads","volume":"20","author":"Weinberg K.","year":"1999","unstructured":"Marcia\u00a0K. Weinberg and Edward Tronick. 1999. Infant and Caregiver Engagement Phases (ICEP): A behavioral system for assessing mutual regulation in infant-caregiver dyads. Infant Mental Health Journal 20, 1 (1999), 9\u201326.","journal-title":"Infant Mental Health Journal"}],"event":{"acronym":"ICMI '24","name":"ICMI '24: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","location":"San Jose Costa Rica"},"container-title":["International Conference on Multimodel Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3678957.3685704","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3678957.3685704","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:10:12Z","timestamp":1750295412000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3678957.3685704"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,4]]},"references-count":46,"alternative-id":["10.1145\/3678957.3685704","10.1145\/3678957"],"URL":"https:\/\/doi.org\/10.1145\/3678957.3685704","relation":{},"subject":[],"published":{"date-parts":[[2024,11,4]]},"assertion":[{"value":"2024-11-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}