{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T09:06:14Z","timestamp":1769504774149,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","funder":[{"DOI":"10.13039\/100006754","name":"Army Research Laboratory","doi-asserted-by":"publisher","award":["W911NF-19-2-0135"],"award-info":[{"award-number":["W911NF-19-2-0135"]}],"id":[{"id":"10.13039\/100006754","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,13]]},"DOI":"10.1145\/3716553.3750758","type":"proceedings-article","created":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T13:13:16Z","timestamp":1760188396000},"page":"284-293","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Beyond Utterance: Understanding Group Problem Solving through Discussion Sequences"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-2305-6651","authenticated-orcid":false,"given":"Zhuoxu","family":"Duan","sequence":"first","affiliation":[{"name":"Dept. of ECSE, Rensselaer Polytechnic Institute, Troy, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6823-0388","authenticated-orcid":false,"given":"Zhengye","family":"Yang","sequence":"additional","affiliation":[{"name":"Dept. of ECSE, Rensselaer Polytechnic Institute, Troy, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4155-8815","authenticated-orcid":false,"given":"Brooke","family":"Foucault Welles","sequence":"additional","affiliation":[{"name":"Communication Studies, Northeastern University, Boston, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5064-7775","authenticated-orcid":false,"given":"Richard","family":"J. Radke","sequence":"additional","affiliation":[{"name":"Dept. of ECSE, Rensselaer Polytechnic Institute, Troy, NY, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,10,12]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN54540.2023.10191640"},{"key":"e_1_3_3_2_3_2","first-page":"I\u20131061","volume-title":"Proceedings.(ICASSP\u201905). IEEE International Conference on Acoustics, Speech, and Signal Processing, 2005.","volume":"1","author":"Ang Jeremy","year":"2005","unstructured":"Jeremy Ang, Yang Liu, and Elizabeth Shriberg. 2005. Automatic dialog act segmentation and classification in multiparty meetings. In Proceedings.(ICASSP\u201905). IEEE International Conference on Acoustics, Speech, and Signal Processing, 2005. , Vol.\u00a01. IEEE, I\u20131061."},{"key":"e_1_3_3_2_4_2","unstructured":"John Arevalo Thamar Solorio Manuel Montes-y G\u00f3mez and Fabio\u00a0A Gonz\u00e1lez. 2017. Gated multimodal units for information fusion. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1702.01992 (2017)."},{"key":"e_1_3_3_2_5_2","unstructured":"Jimmy\u00a0Lei Ba Jamie\u00a0Ryan Kiros and Geoffrey\u00a0E Hinton. 2016. Layer normalization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1607.06450 (2016)."},{"key":"e_1_3_3_2_6_2","unstructured":"Lo\u00efc Barrault Yu-An Chung Mariano\u00a0Coria Meglioli David Dale Ning Dong Mark Duppenthaler Paul-Ambroise Duquenne Brian Ellis Hady Elsahar Justin Haaheim et\u00a0al. 2023. Seamless: Multilingual Expressive and Streaming Speech Translation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.05187 (2023)."},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/1891903.1891910"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"crossref","unstructured":"Jin\u00a0Hyun Cheong Eshin Jolly Tiankang Xie Sophie Byrne Matthew Kenney and Luke\u00a0J Chang. 2023. Py-feat: Python facial expression analysis toolbox. Affective Science 4 4 (2023) 781\u2013796.","DOI":"10.1007\/s42761-023-00191-4"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1037\/10096-006"},{"key":"e_1_3_3_2_10_2","first-page":"720","volume-title":"Proceedings of the European conference on computer vision (ECCV)","author":"Damen Dima","year":"2018","unstructured":"Dima Damen, Hazel Doughty, Giovanni\u00a0Maria Farinella, Sanja Fidler, Antonino Furnari, Evangelos Kazakos, Davide Moltisanti, Jonathan Munro, Toby Perrett, Will Price, et\u00a0al. 2018. Scaling egocentric vision: The epic-kitchens dataset. In Proceedings of the European conference on computer vision (ECCV). 720\u2013736."},{"key":"e_1_3_3_2_11_2","first-page":"4171","volume-title":"Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers)","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers). 4171\u20134186."},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"crossref","unstructured":"Pierre Dillenbourg and David Traum. 2006. Sharing solutions: Persistence and grounding in multimodal collaborative problem solving. The Journal of the Learning Sciences 15 1 (2006) 121\u2013151.","DOI":"10.1207\/s15327809jls1501_9"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"crossref","unstructured":"Hao-Shu Fang Jiefeng Li Hongyang Tang Chao Xu Haoyi Zhu Yuliang Xiu Yong-Lu Li and Cewu Lu. 2022. AlphaPose: Whole-Body Regional Multi-Person Pose Estimation and Tracking in Real-Time. IEEE Transactions on Pattern Analysis and Machine Intelligence (2022).","DOI":"10.1109\/TPAMI.2022.3222784"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"Mauajama Firdaus Hitesh Golchha Asif Ekbal and Pushpak Bhattacharyya. 2021. A deep multi-task model for dialogue act classification intent detection and slot filling. Cognitive Computation 13 (2021) 626\u2013645.","DOI":"10.1007\/s12559-020-09718-4"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Luke Gessler Shabnam Behzad Yang\u00a0Janet Liu Siyao Peng Yilun Zhu and Amir Zeldes. 2021. DisCoDisCo at the DISRPT2021 shared task: A system for discourse segmentation classification and connective detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2109.09777 (2021).","DOI":"10.18653\/v1\/2021.disrpt-1.6"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"crossref","unstructured":"Qinyu Han Zhihao Yang Hongfei Lin and Tian Qin. 2024. Let Topic Flow: A Unified Topic-Guided Segment-Wise Dialogue Summarization Framework. IEEE\/ACM Transactions on Audio Speech and Language Processing (2024).","DOI":"10.1109\/TASLP.2024.3374112"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413678"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"e_1_3_3_2_19_2","first-page":"252","volume-title":"Computer Supported Collaborative Learning 2005","author":"Kapur Manu","year":"2017","unstructured":"Manu Kapur, John Voiklis, and Charles\u00a0K Kinzer. 2017. Problem solving as a complex, evolutionary activity: A methodological framework for analyzing problem-solving processes in a computersupported collaborative environment. In Computer Supported Collaborative Learning 2005. Routledge, 252\u2013261."},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3678957.3685759"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"crossref","unstructured":"Bolin Lai Hongxin Zhang Miao Liu Aryan Pariani Fiona Ryan Wenqi Jia Shirley\u00a0Anugrah Hayati James Rehg and Diyi Yang. 2023. Werewolf among us: Multimodal resources for modeling persuasion behaviors in social deduction games. Association for Computational Linguistics: ACL 2023 (2023).","DOI":"10.18653\/v1\/2023.findings-acl.411"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Wolf Langewitz Matthias N\u00fcbling and Heidemarie Weber. 2003. A theory-based approach to analysing conversation sequences. Epidemiology and Psychiatric Sciences 12 2 (2003) 103\u2013108.","DOI":"10.1017\/S1121189X00006163"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3678957.3685742"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01382"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"crossref","unstructured":"L McCowan Daniel Gatica-Perez Samy Bengio Guillaume Lathoud Mark Barnard and Dong Zhang. 2005. Automatic analysis of multimodal group actions in meetings. IEEE transactions on pattern analysis and machine intelligence 27 3 (2005) 305\u2013317.","DOI":"10.1109\/TPAMI.2005.49"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3689004"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3613851"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3551589"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3479219"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/3242969.3242973"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376450"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"crossref","unstructured":"Chiara Plizzari Gabriele Goletto Antonino Furnari Siddhant Bansal Francesco Ragusa Giovanni\u00a0Maria Farinella Dima Damen and Tatiana Tommasi. 2024. An outlook into the future of egocentric vision. International Journal of Computer Vision 132 11 (2024) 4880\u20134936.","DOI":"10.1007\/s11263-024-02095-7"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i15.17616"},{"key":"e_1_3_3_2_34_2","first-page":"28492","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2023","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2023. Robust speech recognition via large-scale weak supervision. In International conference on machine learning. PMLR, 28492\u201328518."},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2003.1202750"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"crossref","unstructured":"Robert\u00a0L Russell. 1988. A new classification scheme for studies of verbal behavior in psychotherapy. Psychotherapy: Theory Research Practice Training 25 1 (1988) 51.","DOI":"10.1037\/h0085323"},{"key":"e_1_3_3_2_37_2","unstructured":"Robert\u00a0L Russell and Dietmar Czogalik. 1989. Strategies for analyzing conversations: Frequencies sequences or rules. Journal of Social Behavior and Personality 4 3 (1989) 221."},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","unstructured":"R.\u00a0L. Russell and C. Staszewski. 1988. The Unit Problem: Some Systematic Distinctions and Critical Dilemmas for Psychotherapy Process Research. Psychotherapy: Theory Research Practice Training 25 2 (1988) 191\u2013200. 10.1037\/h0085333","DOI":"10.1037\/h0085333"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/2557500.2557507"},{"key":"e_1_3_3_2_40_2","unstructured":"Saba Sturua Isabelle Mohr Mohammad\u00a0Kalim Akram Michael G\u00fcnther Bo Wang Markus Krimmel Feng Wang Georgios Mastrapas Andreas Koukounas Andreas Koukounas Nan Wang and Han Xiao. 2024. jina-embeddings-v3: Multilingual Embeddings With Task LoRA. arxiv:https:\/\/arXiv.org\/abs\/2409.10173\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2409.10173"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02546"},{"key":"e_1_3_3_2_42_2","unstructured":"Silero Team. 2024. Silero VAD: pre-trained enterprise-grade Voice Activity Detector (VAD) Number Detector and Language Classifier. https:\/\/github.com\/snakers4\/silero-vad."},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"crossref","unstructured":"Elahe Vahdani and Yingli Tian. 2022. Deep learning-based action detection in untrimmed videos: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence 45 4 (2022) 4302\u20134320.","DOI":"10.1109\/TPAMI.2022.3193611"},{"key":"e_1_3_3_2_44_2","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2001.940902"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531817"},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01254"},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"crossref","unstructured":"Linzi Xing and Giuseppe Carenini. 2021. Improving unsupervised dialogue topic segmentation with utterance-pair coherence scoring. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2106.06719 (2021).","DOI":"10.18653\/v1\/2021.sigdial-1.18"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093642"},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","DOI":"10.1145\/3340555.3353761"},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"crossref","unstructured":"Lingyu Zhang and Richard\u00a0J Radke. 2020. A multi-stream recurrent neural network for social role detection in multiparty interactions. IEEE Journal of Selected Topics in Signal Processing 14 3 (2020) 554\u2013567.","DOI":"10.1109\/JSTSP.2020.2992394"}],"event":{"name":"ICMI '25: International Conference on Multimodal Interaction","location":"Canberra Australia","acronym":"ICMI '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 27th International Conference on Multimodal Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3716553.3750758","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,26]],"date-time":"2026-01-26T22:27:47Z","timestamp":1769466467000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3716553.3750758"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"references-count":51,"alternative-id":["10.1145\/3716553.3750758","10.1145\/3716553"],"URL":"https:\/\/doi.org\/10.1145\/3716553.3750758","relation":{},"subject":[],"published":{"date-parts":[[2025,10,12]]},"assertion":[{"value":"2025-10-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}