{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T08:01:06Z","timestamp":1774944066628,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,9]],"date-time":"2023-10-09T00:00:00Z","timestamp":1696809600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,9]]},"DOI":"10.1145\/3610661.3617166","type":"proceedings-article","created":{"date-parts":[[2023,10,9]],"date-time":"2023-10-09T16:51:22Z","timestamp":1696870282000},"page":"71-75","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Multimodal Prediction of User's Performance in High-Stress Dialogue Interactions"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-2439-9066","authenticated-orcid":false,"given":"Setareh","family":"Nasihati Gilani","sequence":"first","affiliation":[{"name":"Institute for Creative Technologies, University of Southern California, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5849-1987","authenticated-orcid":false,"given":"Kimberly","family":"Pollard","sequence":"additional","affiliation":[{"name":"DEVCOM Army Research Laboratory, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3473-9586","authenticated-orcid":false,"given":"David","family":"Traum","sequence":"additional","affiliation":[{"name":"Institute for Creative Technologies, University of Southern California, United States"}]}],"member":"320","published-online":{"date-parts":[[2023,10,9]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3279810.3279849"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2018.00019"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuroimage.2006.11.048"},{"key":"e_1_3_2_1_4_1","volume-title":"Artificial Intelligence and Machine Learning for Multi-Domain Operations Applications II, Vol.\u00a011413","author":"Chaffey Patricia","unstructured":"Patricia Chaffey, Ron Artstein, Kallirroi Georgila, Kimberly\u00a0A Pollard, Setareh\u00a0Nasihati Gilani, David\u00a0M Krum, David Nelson, Kevin Huynh, Alesia Gainer, Seyed\u00a0Hossein Alavi, 2020. Human swarm interaction using plays, audibles, and a virtual spokesperson. In Artificial Intelligence and Machine Learning for Multi-Domain Operations Applications II, Vol.\u00a011413. SPIE, 272\u2013285."},{"key":"e_1_3_2_1_5_1","volume-title":"Workshop on Human Language Technologies in Crisis and Emergency Management.","author":"Chaffey Patricia","year":"2019","unstructured":"Patricia Chaffey, Ron Artstein, Kallirroi Georgila, Kimberly\u00a0A Pollard, Setareh\u00a0Nasihati Gilani, David\u00a0M Krum, David Nelson, Kevin Huynh, Alesia Gainer, Seyed\u00a0Hossein Alavi, Rhys Yahata, and David Traum. 2019. Developing a virtual reality wildfire simulation to analyze human communication and interaction with a robotic swarm during emergencies. In Workshop on Human Language Technologies in Crisis and Emergency Management."},{"key":"e_1_3_2_1_6_1","unstructured":"Fran\u00e7ois Chollet 2015. Keras. https:\/\/keras.io."},{"key":"e_1_3_2_1_7_1","volume-title":"Multimodal utterance-level affect analysis using visual, audio and text features. arXiv preprint arXiv:1805.00625","author":"Deng Didan","year":"2018","unstructured":"Didan Deng, Yuqian Zhou, Jimin Pi, and Bertram\u00a0E Shi. 2018. Multimodal utterance-level affect analysis using visual, audio and text features. arXiv preprint arXiv:1805.00625 (2018)."},{"key":"e_1_3_2_1_8_1","volume-title":"Constants across cultures in the face and emotion.Journal of personality and social psychology 17, 2","author":"Ekman Paul","year":"1971","unstructured":"Paul Ekman and Wallace\u00a0V Friesen. 1971. Constants across cultures in the face and emotion.Journal of personality and social psychology 17, 2 (1971), 124."},{"key":"e_1_3_2_1_9_1","volume-title":"Facial action coding system. Environmental Psychology & Nonverbal Behavior","author":"Ekman Paul","year":"1978","unstructured":"Paul Ekman and Wallace\u00a0V Friesen. 1978. Facial action coding system. Environmental Psychology & Nonverbal Behavior (1978)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874246"},{"key":"e_1_3_2_1_11_1","volume-title":"cognitive load and learning outcomes during simulation training. Medical education 46, 11","author":"Fraser Kristin","year":"2012","unstructured":"Kristin Fraser, Irene Ma, Elise Teteris, Heather Baxter, Bruce Wright, and Kevin McLaughlin. 2012. Emotion, cognitive load and learning outcomes during simulation training. Medical education 46, 11 (2012), 1055\u20131062."},{"key":"e_1_3_2_1_12_1","unstructured":"Jochen Hartmann. 2022. Emotion English DistilRoBERTa-base. https:\/\/huggingface.co\/j-hartmann\/emotion-english-distilroberta-base\/."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413678"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Larry\u00a0E Humes and Shari\u00a0S Floyd. 2005. Measures of working memory sequence learning and speech recognition in the elderly. (2005).","DOI":"10.1044\/1092-4388(2005\/016)"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3536221.3556631"},{"key":"e_1_3_2_1_17_1","volume-title":"End-to-end neural coreference resolution. arXiv preprint arXiv:1707.07045","author":"Lee Kenton","year":"2017","unstructured":"Kenton Lee, Luheng He, Mike Lewis, and Luke Zettlemoyer. 2017. End-to-end neural coreference resolution. arXiv preprint arXiv:1707.07045 (2017)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-63031-7_26"},{"key":"e_1_3_2_1_19_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_20_1","volume-title":"Scalevlad: Improving multimodal sentiment analysis via multi-scale fusion of locally descriptors. arXiv preprint arXiv:2112.01368","author":"Luo Huaishao","year":"2021","unstructured":"Huaishao Luo, Lei Ji, Yanyong Huang, Bin Wang, Shenggong Ji, and Tianrui Li. 2021. Scalevlad: Improving multimodal sentiment analysis via multi-scale fusion of locally descriptors. arXiv preprint arXiv:2112.01368 (2021)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1037\/1528-3542.7.2.447"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3536221.3556607"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.895976"},{"key":"e_1_3_2_1_24_1","volume-title":"Robust speech recognition via large-scale weak supervision. arXiv preprint arXiv:2212.04356","author":"Radford Alec","year":"2022","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2022. Robust speech recognition via large-scale weak supervision. arXiv preprint arXiv:2212.04356 (2022)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3191801.3191816"},{"key":"e_1_3_2_1_26_1","volume-title":"Multimodal emotion recognition using deep learning architectures. In 2016 IEEE winter conference on applications of computer vision (WACV)","author":"Ranganathan Hiranmayi","unstructured":"Hiranmayi Ranganathan, Shayok Chakraborty, and Sethuraman Panchanathan. 2016. Multimodal emotion recognition using deep learning architectures. In 2016 IEEE winter conference on applications of computer vision (WACV). IEEE, 1\u20139."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijpsycho.2010.11.001"},{"key":"e_1_3_2_1_28_1","volume-title":"a distilled version of BERT: smaller, faster, cheaper and lighter. ArXiv abs\/1910.01108","author":"Sanh Victor","year":"2019","unstructured":"Victor Sanh, Lysandre Debut, Julien Chaumond, and Thomas Wolf. 2019. DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. ArXiv abs\/1910.01108 (2019)."},{"key":"e_1_3_2_1_29_1","volume-title":"wav2vec: Unsupervised pre-training for speech recognition. arXiv preprint arXiv:1904.05862","author":"Schneider Steffen","year":"2019","unstructured":"Steffen Schneider, Alexei Baevski, Ronan Collobert, and Michael Auli. 2019. wav2vec: Unsupervised pre-training for speech recognition. arXiv preprint arXiv:1904.05862 (2019)."},{"key":"e_1_3_2_1_30_1","volume-title":"Facial mimicry in its social setting. Frontiers in psychology 6","author":"Seibt Beate","year":"2015","unstructured":"Beate Seibt, Andreas M\u00fchlberger, Katja\u00a0U Likowski, and Peter Weyers. 2015. Facial mimicry in its social setting. Frontiers in psychology 6 (2015), 1122."},{"key":"e_1_3_2_1_31_1","volume-title":"Multimodal emotion recognition in response to videos","author":"Soleymani Mohammad","year":"2011","unstructured":"Mohammad Soleymani, Maja Pantic, and Thierry Pun. 2011. Multimodal emotion recognition in response to videos. IEEE transactions on affective computing 3, 2 (2011), 211\u2013223."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340555.3353750"},{"key":"e_1_3_2_1_33_1","volume-title":"Videomae: Masked autoencoders are data-efficient learners for self-supervised video pre-training. arXiv preprint arXiv:2203.12602","author":"Tong Zhan","year":"2022","unstructured":"Zhan Tong, Yibing Song, Jue Wang, and Limin Wang. 2022. Videomae: Masked autoencoders are data-efficient learners for self-supervised video pre-training. arXiv preprint arXiv:2203.12602 (2022)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.repl4nlp-1.20"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2017.2764438"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuroimage.2009.01.016"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"e_1_3_2_1_38_1","volume-title":"Improving dialog systems for negotiation with personality modeling. arXiv preprint arXiv:2010.09954","author":"Yang Runzhe","year":"2020","unstructured":"Runzhe Yang, Jingxiao Chen, and Karthik Narasimhan. 2020. Improving dialog systems for negotiation with personality modeling. arXiv preprint arXiv:2010.09954 (2020)."}],"event":{"name":"ICMI '23: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","location":"Paris France","acronym":"ICMI '23","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["International Cconference on Multimodal Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610661.3617166","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3610661.3617166","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T19:34:20Z","timestamp":1755891260000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610661.3617166"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,9]]},"references-count":38,"alternative-id":["10.1145\/3610661.3617166","10.1145\/3610661"],"URL":"https:\/\/doi.org\/10.1145\/3610661.3617166","relation":{},"subject":[],"published":{"date-parts":[[2023,10,9]]},"assertion":[{"value":"2023-10-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}