{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:00:11Z","timestamp":1777489211186,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":80,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612517","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:26:54Z","timestamp":1698391614000},"page":"6090-6100","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":20,"title":["Multi-label Emotion Analysis in Conversation via Multimodal Knowledge Distillation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5750-6860","authenticated-orcid":false,"given":"Sidharth","family":"Anand","sequence":"first","affiliation":[{"name":"BITS Pilani, Hyderabad, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3114-1044","authenticated-orcid":false,"given":"Naresh Kumar","family":"Devulapally","sequence":"additional","affiliation":[{"name":"State University of New York at Buffalo, Buffalo, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5393-0840","authenticated-orcid":false,"given":"Sreyasee Das","family":"Bhattacharjee","sequence":"additional","affiliation":[{"name":"State University of New York at Buffalo, Buffalo, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7901-8793","authenticated-orcid":false,"given":"Junsong","family":"Yuan","sequence":"additional","affiliation":[{"name":"State University of New York at Buffalo, Buffalo, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Transformer models for text-based emotion detection: a review of BERT-based approaches. Artificial Intelligence Review","author":"Acheampong Francisca Adoma","year":"2021","unstructured":"Francisca Adoma Acheampong, Henry Nunoo-Mensah, and Wenyu Chen. 2021. Transformer models for text-based emotion detection: a review of BERT-based approaches. Artificial Intelligence Review (2021), 1--41."},{"key":"e_1_3_2_1_2_1","volume-title":"Shapes of emotions: Multimodal emotion recognition in conversations via emotion shifts. arXiv preprint arXiv:2112.01938","author":"Agarwal Harsh","year":"2021","unstructured":"Harsh Agarwal, Keshav Bansal, Abhinav Joshi, and Ashutosh Modi. 2021. Shapes of emotions: Multimodal emotion recognition in conversations via emotion shifts. arXiv preprint arXiv:2112.01938 (2021)."},{"key":"e_1_3_2_1_3_1","volume-title":"Vatt: Transformers for multimodal self-supervised learning from raw video, audio and text. arXiv preprint arXiv:2104.11178","author":"Akbari Hassan","year":"2021","unstructured":"Hassan Akbari, Liangzhe Yuan, Rui Qian,Wei-Hong Chuang, Shih-Fu Chang, Yin Cui, and Boqing Gong. 2021. Vatt: Transformers for multimodal self-supervised learning from raw video, audio and text. arXiv preprint arXiv:2104.11178 (2021)."},{"key":"e_1_3_2_1_4_1","first-page":"25","article-title":"Self-supervised multimodal versatile networks","volume":"33","author":"Alayrac Jean-Baptiste","year":"2020","unstructured":"Jean-Baptiste Alayrac, Adria Recasens, Rosalia Schneider, Relja Arandjelovi\u0107, Jason Ramapuram, Jeffrey De Fauw, Lucas Smaira, Sander Dieleman, and Andrew Zisserman. 2020. Self-supervised multimodal versatile networks. Advances in Neural Information Processing Systems 33 (2020), 25--37.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_5_1","volume-title":"Population Aging: A Comparison Among Industrialized Countries: Populations around the world are growing older, but the trends are not cause for despair. Health affairs 19, 3","author":"Anderson Gerard F","year":"2000","unstructured":"Gerard F Anderson and Peter Sotir Hussey. 2000. Population Aging: A Comparison Among Industrialized Countries: Populations around the world are growing older, but the trends are not cause for despair. Health affairs 19, 3 (2000), 191--203."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"e_1_3_2_1_7_1","volume-title":"Multimodal Attentive Learning for Real-time Explainable Emotion Recognition in Conversations. In 2022 IEEE International Symposium on Circuits and Systems (ISCAS). IEEE, 1210--1214","author":"Arumugam Balaji","year":"2022","unstructured":"Balaji Arumugam, Sreyasee Das Bhattacharjee, and Junsong Yuan. 2022. Multimodal Attentive Learning for Real-time Explainable Emotion Recognition in Conversations. In 2022 IEEE International Symposium on Circuits and Systems (ISCAS). IEEE, 1210--1214."},{"key":"e_1_3_2_1_8_1","volume-title":"Jamie Ryan Kiros, and Geoffrey E Hinton","author":"Ba Jimmy Lei","year":"2016","unstructured":"Jimmy Lei Ba, Jamie Ryan Kiros, and Geoffrey E Hinton. 2016. Layer normalization. arXiv preprint arXiv:1607.06450 (2016)."},{"key":"e_1_3_2_1_9_1","volume-title":"Multimodal machine learning: A survey and taxonomy","author":"Tadas","year":"2018","unstructured":"Tadas Baltru?aitis, Chaitanya Ahuja, and Louis-Philippe Morency. 2018. Multimodal machine learning: A survey and taxonomy. IEEE transactions on pattern analysis and machine intelligence 41, 2 (2018), 423--443."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3210183"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACII55700.2022.9953863"},{"key":"e_1_3_2_1_12_1","volume-title":"EmoReact: A Multimodal Approach and Dataset for Recognizing Emotional Responses in Children. International Conference on Multimodal Interfaces( ICMI)","author":"Charles","year":"2016","unstructured":"Charles E. Hughes Louis-Philippe Morency Behnaz Nojavanasghari, Tadas Baltrusaitis. 2016. EmoReact: A Multimodal Approach and Dataset for Recognizing Emotional Responses in Children. International Conference on Multimodal Interfaces( ICMI) (2016)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Daniel E Berlyne. 1960. Conflict arousal and curiosity. (1960).","DOI":"10.1037\/11164-000"},{"key":"e_1_3_2_1_14_1","volume-title":"Pattern recognition and machine learning","author":"Bishop Christopher M","unstructured":"Christopher M Bishop and Nasser M Nasrabadi. 2006. Pattern recognition and machine learning. Vol. 4. Springer."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.189"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00511"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/n19--1423"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0062802"},{"key":"e_1_3_2_1_19_1","volume-title":"Jacqueline CK Lam, and Victor OK Li","author":"Fan Yingruo","year":"2021","unstructured":"Yingruo Fan, Jacqueline CK Lam, and Victor OK Li. 2021. Demographic effects on facial emotion expression: an interdisciplinary investigation of the facial action units of happiness. Scientific reports 11, 1 (2021), 1--11."},{"key":"e_1_3_2_1_20_1","volume-title":"Facial age affects emotional expression decoding. Frontiers in psychology 5","author":"F\u00f6lster Mara","year":"2014","unstructured":"Mara F\u00f6lster, Ursula Hess, and Katja Werheid. 2014. Facial age affects emotional expression decoding. Frontiers in psychology 5 (2014), 30."},{"key":"e_1_3_2_1_21_1","volume-title":"Robert E Kleck, and Ursula Hess.","author":"Freudenberg Maxi","year":"2015","unstructured":"Maxi Freudenberg, Reginald B Adams Jr, Robert E Kleck, and Ursula Hess. 2015. Through a glass darkly: facial wrinkles affect our processing of emotion in the elderly. Frontiers in psychology 6 (2015), 1476."},{"key":"e_1_3_2_1_22_1","volume-title":"COSMIC: COmmonSense knowledge for eMotion Identification in Conversations. arXiv:2010.02795 [cs.CL]","author":"Ghosal Deepanway","year":"2020","unstructured":"Deepanway Ghosal, Navonil Majumder, Alexander Gelbukh, Rada Mihalcea, and Soujanya Poria. 2020. COSMIC: COmmonSense knowledge for eMotion Identification in Conversations. arXiv:2010.02795 [cs.CL]"},{"key":"e_1_3_2_1_23_1","volume-title":"Dialoguegcn: A graph convolutional neural network for emotion recognition in conversation. arXiv preprint arXiv:1908.11540","author":"Ghosal Deepanway","year":"2019","unstructured":"Deepanway Ghosal, Navonil Majumder, Soujanya Poria, Niyati Chhaya, and Alexander Gelbukh. 2019. Dialoguegcn: A graph convolutional neural network for emotion recognition in conversation. arXiv preprint arXiv:1908.11540 (2019)."},{"key":"e_1_3_2_1_24_1","volume-title":"Ast: Audio spectrogram transformer. arXiv preprint arXiv:2104.01778","author":"Gong Yuan","year":"2021","unstructured":"Yuan Gong, Yu-An Chung, and James Glass. 2021. Ast: Audio spectrogram transformer. arXiv preprint arXiv:2104.01778 (2021)."},{"key":"e_1_3_2_1_25_1","first-page":"622","article-title":"Age deficits in facial affect recognition: The influence of dynamic cues","volume":"72","author":"Grainger Sarah A","year":"2017","unstructured":"Sarah A Grainger, Julie D Henry, Louise H Phillips, Eric J Vanman, and Roy Allen. 2017. Age deficits in facial affect recognition: The influence of dynamic cues. Journals of Gerontology Series B: Psychological Sciences and Social Sciences 72, 4 (2017), 622--632.","journal-title":"Journals of Gerontology Series B: Psychological Sciences and Social Sciences"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2021.620768"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.723"},{"key":"e_1_3_2_1_28_1","volume-title":"Improving multimodal fusion with hierarchical mutual information maximization for multimodal sentiment analysis. arXiv preprint arXiv:2109.00412","author":"Han Wei","year":"2021","unstructured":"Wei Han, Hui Chen, and Soujanya Poria. 2021. Improving multimodal fusion with hierarchical mutual information maximization for multimodal sentiment analysis. arXiv preprint arXiv:2109.00412 (2021)."},{"key":"e_1_3_2_1_29_1","unstructured":"Susan Harter and Nancy Rumbaugh Whitesell. 1989. Developmental changes in children's understanding of single multiple and blended emotion concepts. (1989)."},{"key":"e_1_3_2_1_30_1","volume-title":"audioLIME: Listenable Explanations Using Source Separation. CoRR abs\/2008.00582","author":"Haunschmid Verena","year":"2020","unstructured":"Verena Haunschmid, Ethan Manilow, and Gerhard Widmer. 2020. audioLIME: Listenable Explanations Using Source Separation. CoRR abs\/2008.00582 (2020). arXiv:2008.00582 https:\/\/arxiv.org\/abs\/2008.00582"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1280"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413678"},{"key":"e_1_3_2_1_33_1","volume-title":"Gaussian error linear units (gelus). arXiv preprint arXiv:1606.08415","author":"Hendrycks Dan","year":"2016","unstructured":"Dan Hendrycks and Kevin Gimpel. 2016. Gaussian error linear units (gelus). arXiv preprint arXiv:1606.08415 (2016)."},{"key":"e_1_3_2_1_34_1","unstructured":"K.J.T. Hetterscheid. 2020. Detecting agitated speech: A neural network approach. http:\/\/essay.utwente.nl\/82014\/"},{"key":"e_1_3_2_1_35_1","volume-title":"Dialoguecrn: Contextual reasoning networks for emotion recognition in conversations. arXiv preprint arXiv:2106.01978","author":"Hu Dou","year":"2021","unstructured":"Dou Hu, Lingwei Wei, and Xiaoyong Huai. 2021. Dialoguecrn: Contextual reasoning networks for emotion recognition in conversations. arXiv preprint arXiv:2106.01978 (2021)."},{"key":"e_1_3_2_1_36_1","volume-title":"UniMSE: Towards Unified Multimodal Sentiment Analysis and Emotion Recognition. arXiv preprint arXiv:2211.11256","author":"Hu Guimin","year":"2022","unstructured":"Guimin Hu, Ting-En Lin, Yi Zhao, Guangming Lu, Yuchuan Wu, and Yongbin Li. 2022. UniMSE: Towards Unified Multimodal Sentiment Analysis and Emotion Recognition. arXiv preprint arXiv:2211.11256 (2022)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2023.3244041"},{"key":"e_1_3_2_1_38_1","volume-title":"Expression Recognition Across Age. In 2021 16th IEEE International Conference on Automatic Face and Gesture Recognition (FG","author":"Jannat Sk Rahatul","year":"2021","unstructured":"Sk Rahatul Jannat and Shaun Canavan. 2021. Expression Recognition Across Age. In 2021 16th IEEE International Conference on Automatic Face and Gesture Recognition (FG 2021). IEEE, 1--5."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG52635.2021.9667062"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-78292-4_20"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1111\/spc3.12108"},{"key":"e_1_3_2_1_42_1","unstructured":"Michael Leben. 2012. Email Classification with Contextual Information. Ph.D. Dissertation. Hasso-Plattner-Institute."},{"key":"e_1_3_2_1_43_1","volume-title":"EmoCaps: Emotion Capsule based Model for Conversational Emotion Recognition. arXiv preprint arXiv:2203.13504","author":"Li Zaijing","year":"2022","unstructured":"Zaijing Li, Fengxiao Tang, Ming Zhao, and Yusen Zhu. 2022. EmoCaps: Emotion Capsule based Model for Conversational Emotion Recognition. arXiv preprint arXiv:2203.13504 (2022)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3049898"},{"key":"e_1_3_2_1_45_1","volume-title":"Paul Pu Liang, Amir Zadeh, and Louis-Philippe Morency.","author":"Liu Zhun","year":"2018","unstructured":"Zhun Liu, Ying Shen, Varun Bharadhwaj Lakshminarasimhan, Paul Pu Liang, Amir Zadeh, and Louis-Philippe Morency. 2018. Efficient low-rank multimodal fusion with modality-specific factors. arXiv preprint arXiv:1806.00064 (2018)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340555.3353747"},{"key":"e_1_3_2_1_47_1","volume-title":"Emotion experience and expression across the adult life span: insights from a multimodal assessment study. Psychology and aging 21, 2","author":"Magai Carol","year":"2006","unstructured":"Carol Magai, Nathan S Consedine, Yulia S Krivoshekova, Elizabeth Kudadjie-Gyamfi, and Renee McPherson. 2006. Emotion experience and expression across the adult life span: insights from a multimodal assessment study. Psychology and aging 21, 2 (2006), 303."},{"key":"e_1_3_2_1_48_1","volume-title":"Multimodal sentiment analysis using hierarchical fusion with context modeling. Knowledge-based systems 161","author":"Majumder Navonil","year":"2018","unstructured":"Navonil Majumder, Devamanyu Hazarika, Alexander Gelbukh, Erik Cambria, and Soujanya Poria. 2018. Multimodal sentiment analysis using hierarchical fusion with context modeling. Knowledge-based systems 161 (2018), 124--133."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"Navonil Majumder Soujanya Poria Devamanyu Hazarika Rada Mihalcea Alexander Gelbukh and Erik Cambria. 2019. DialogueRNN: An Attentive RNN for Emotion Detection in Conversations. arXiv:1811.00405 [cs.CL]","DOI":"10.1609\/aaai.v33i01.33016818"},{"key":"e_1_3_2_1_50_1","volume-title":"Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. 2013. Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)."},{"key":"e_1_3_2_1_51_1","first-page":"15682","article-title":"Revisiting the calibration of modern neural networks","volume":"34","author":"Minderer Matthias","year":"2021","unstructured":"Matthias Minderer, Josip Djolonga, Rob Romijnders, Frances Hubis, Xiaohua Zhai, Neil Houlsby, Dustin Tran, and Mario Lucic. 2021. Revisiting the calibration of modern neural networks. Advances in Neural Information Processing Systems 34 (2021), 15682--15694.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i02.5492"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3576050.3576108"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/TENSYMP46218.2019.8971023"},{"key":"e_1_3_2_1_55_1","volume-title":"Zaher Al Aghbari","author":"Naveed Ahmed Shini Girija","year":"2023","unstructured":"Shini Girija Naveed Ahmed, Zaher Al Aghbari. 2023. A systematic survey on multimodal emotion recognition using learning algorithms. Intelligent Systems with Applications (2023)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/2993148.2993168"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/2993148.2993176"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2016.0055"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.214"},{"key":"e_1_3_2_1_60_1","volume-title":"Explaining the Predictions of Any Classifier. CoRR abs\/1602.04938","author":"Ribeiro Marco T\u00falio","year":"2016","unstructured":"Marco T\u00falio Ribeiro, Sameer Singh, and Carlos Guestrin. 2016. \"Why Should I Trust You?\": Explaining the Predictions of Any Classifier. CoRR abs\/1602.04938 (2016). arXiv:1602.04938 http:\/\/arxiv.org\/abs\/1602.04938"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1117\/1.JEI.31.6.063032"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6431"},{"key":"e_1_3_2_1_63_1","volume-title":"Proceedings of the conference. Association for Computational Linguistics. Meeting","volume":"2019","author":"Hubert Tsai Yao-Hung","year":"2019","unstructured":"Yao-Hung Hubert Tsai, Shaojie Bai, Paul Pu Liang, J Zico Kolter, Louis-Philippe Morency, and Ruslan Salakhutdinov. 2019. Multimodal transformer for unaligned multimodal language sequences. In Proceedings of the conference. Association for Computational Linguistics. Meeting, Vol. 2019. NIH Public Access, 6558."},{"key":"e_1_3_2_1_64_1","volume-title":"Amir Zadeh, Louis-Philippe Morency, and Ruslan Salakhutdinov.","author":"Hubert Tsai Yao-Hung","year":"2018","unstructured":"Yao-Hung Hubert Tsai, Paul Pu Liang, Amir Zadeh, Louis-Philippe Morency, and Ruslan Salakhutdinov. 2018. Learning factorized multimodal representations. arXiv preprint arXiv:1806.06176 (2018)."},{"key":"e_1_3_2_1_65_1","volume-title":"Sentiment-Emotion-and Context-guided Knowledge Selection Framework for Emotion Recognition in Conversations","author":"Tu Geng","year":"2022","unstructured":"Geng Tu, Bin Liang, Dazhi Jiang, and Ruifeng Xu. 2022. Sentiment-Emotion-and Context-guided Knowledge Selection Framework for Emotion Recognition in Conversations. IEEE Transactions on Affective Computing (2022)."},{"key":"e_1_3_2_1_66_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3572915"},{"key":"e_1_3_2_1_68_1","volume-title":"Proc. Int. Symp. Signal Process. Biomed. Eng Informat. 549--553","author":"Wang KX","year":"2014","unstructured":"KX Wang, QL Zhang, and SY Liao. 2014. A database of elderly emotional speech. In Proc. Int. Symp. Signal Process. Biomed. Eng Informat. 549--553."},{"key":"e_1_3_2_1_69_1","volume-title":"2016 IEEE\/ACIS 15th International Conference on Computer and Information Science (ICIS). IEEE, 1--6.","author":"Zhu ZongBao","year":"2016","unstructured":"KunxiaWang, ZongBao Zhu, ShidongWang, Xiao Sun, and Lian Li. 2016. A database for emotional interactions of the elderly. In 2016 IEEE\/ACIS 15th International Conference on Computer and Information Science (ICIS). IEEE, 1--6."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/T-AFFC.2010.16"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1044"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-65414-6_35"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1518"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17289"},{"key":"e_1_3_2_1_75_1","volume-title":"Mosi: multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos. arXiv preprint arXiv:1606.06259","author":"Zadeh Amir","year":"2016","unstructured":"Amir Zadeh, Rowan Zellers, Eli Pincus, and Louis-Philippe Morency. 2016. Mosi: multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos. arXiv preprint arXiv:1606.06259 (2016)."},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1208"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.291"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.291"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i16.17686"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i16.17686"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612517","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612517","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T23:56:38Z","timestamp":1755820598000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612517"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":80,"alternative-id":["10.1145\/3581783.3612517","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612517","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}