{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,4]],"date-time":"2026-07-04T08:16:48Z","timestamp":1783153008950,"version":"3.54.6"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","funder":[{"name":"National Natural Science Foundation of China","award":["62272188"],"award-info":[{"award-number":["62272188"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,13]]},"DOI":"10.1145\/3774904.3792393","type":"proceedings-article","created":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T12:38:33Z","timestamp":1777293513000},"page":"4635-4646","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Beyond Words: Enhancing Desire, Emotion, and Sentiment Recognition with Non-Verbal Cues"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-7820-3498","authenticated-orcid":false,"given":"Wei","family":"Chen","sequence":"first","affiliation":[{"name":"College of Informatics, Huazhong Agricultural University, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8747-1790","authenticated-orcid":false,"given":"Tongguan","family":"Wang","sequence":"additional","affiliation":[{"name":"College of Informatics, Huazhong Agricultural University, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0949-2987","authenticated-orcid":false,"given":"Feiyue","family":"Xue","sequence":"additional","affiliation":[{"name":"College of Informatics, Huazhong Agricultural University, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3236-6838","authenticated-orcid":false,"given":"Junkai","family":"Li","sequence":"additional","affiliation":[{"name":"College of Informatics, Huazhong Agricultural University, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9920-1186","authenticated-orcid":false,"given":"Hui","family":"Liu","sequence":"additional","affiliation":[{"name":"College of Informatics, Huazhong Agricultural University, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6638-5009","authenticated-orcid":false,"given":"Ying","family":"Sha","sequence":"additional","affiliation":[{"name":"College of Informatics, Huazhong Agricultural University, Wuhan, China, Engineering Research Center of Intelligent Technology for Agriculture, Wuhan, China, Hubei Engineering Technology Research Center of Agricultural Big Data, Wuhan, China, and Key Laboratory of Smart Farming for Agricultural Animals, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,4,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2025.129376"},{"key":"e_1_3_2_1_2_1","volume-title":"The common neural bases between sexual desire and love: a multilevel kernel density fMRI analysis. The journal of sexual medicine","author":"Cacioppo Stephanie","year":"2012","unstructured":"Stephanie Cacioppo, Francesco Bianchi-Demicheli, Chris Frum, James G Pfaus, and James W Lewis. 2012. The common neural bases between sexual desire and love: a multilevel kernel density fMRI analysis. The journal of sexual medicine, Vol. 9, 4 (2012), 1048-1054."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.aacl-main.31"},{"key":"e_1_3_2_1_4_1","volume-title":"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. ArXiv","author":"Dosovitskiy Alexey","year":"1929","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2020. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. ArXiv, Vol. abs\/2010.11929 (2020). https:\/\/api.semanticscholar.org\/CorpusID:225039882"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/S14-2026"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.3003648"},{"key":"e_1_3_2_1_7_1","first-page":"15979","volume-title":"Masked Autoencoders Are Scalable Vision Learners. 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"He Kaiming","year":"2021","unstructured":"Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li, Piotr Doll'ar, and Ross B. Girshick. 2021. Masked Autoencoders Are Scalable Vision Learners. 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2021), 15979-15988. https:\/\/api.semanticscholar.org\/CorpusID:243985980"},{"key":"e_1_3_2_1_8_1","first-page":"770","volume-title":"Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2015","author":"He Kaiming","year":"2015","unstructured":"Kaiming He, X. Zhang, Shaoqing Ren, and Jian Sun. 2015. Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2015), 770-778. https:\/\/api.semanticscholar.org\/CorpusID:206594692"},{"key":"e_1_3_2_1_9_1","volume-title":"The psychology of desire","author":"Hofmann Wilhelm","unstructured":"Wilhelm Hofmann and Loran F Nordgren. 2015. The psychology of desire. Guilford Publications."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2800835.2800910"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219853"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3388861"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","unstructured":"Gabriel Ilharco Mitchell Wortsman Ross Wightman Cade Gordon Nicholas Carlini Rohan Taori Achal Dave Vaishaal Shankar Hongseok Namkoong John Miller Hannaneh Hajishirzi Ali Farhadi and Ludwig Schmidt. 2021. OpenCLIP. doi:10.5281\/zenodo.5143773 If you use this software please cite it as below..","DOI":"10.5281\/zenodo.5143773"},{"key":"e_1_3_2_1_14_1","volume-title":"Beyond Emotion: A Multi-Modal Dataset for Human Desire Understanding. In North American","author":"Jia Ao","year":"2022","unstructured":"Ao Jia, Yu He, Yazhou Zhang, Sagar Uprety, Dawei Song, and Christina Lioma. 2022. Beyond Emotion: A Multi-Modal Dataset for Human Desire Understanding. In North American Chapter of the Association for Computational Linguistics. https:\/\/api.semanticscholar.org\/CorpusID:250391079"},{"key":"e_1_3_2_1_15_1","volume-title":"Convolutional Neural Networks for Sentence Classification. In Conference on Empirical Methods in Natural Language Processing. https:\/\/api.semanticscholar.org\/CorpusID:9672033","author":"Kim Yoon","year":"2014","unstructured":"Yoon Kim. 2014. Convolutional Neural Networks for Sentence Classification. In Conference on Empirical Methods in Natural Language Processing. https:\/\/api.semanticscholar.org\/CorpusID:9672033"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3244390"},{"key":"e_1_3_2_1_17_1","volume-title":"CLMLF: A Contrastive Learning and Multi-Layer Fusion Method for Multimodal Sentiment Detection. ArXiv","author":"Li Zhen","year":"2022","unstructured":"Zhen Li, Bing Xu, Conghui Zhu, and Tiejun Zhao. 2022. CLMLF: A Contrastive Learning and Multi-Layer Fusion Method for Multimodal Sentiment Detection. ArXiv, Vol. abs\/2204.05515 (2022). https:\/\/api.semanticscholar.org\/CorpusID:248119031"},{"key":"e_1_3_2_1_18_1","volume-title":"The desire model: Cross-modal emotion analysis and expression for robots","author":"Lim Angelica","year":"2012","unstructured":"Angelica Lim, Tetsuya Ogata, and Hiroshi G Okuno. 2012. The desire model: Cross-modal emotion analysis and expression for robots. Information Processing Society of Japan, Vol. 5, 4 (2012)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00368"},{"key":"e_1_3_2_1_20_1","volume-title":"Decoupled Weight Decay Regularization. In International Conference on Learning Representations. https:\/\/api.semanticscholar.org\/CorpusID:53592270","author":"Loshchilov Ilya","year":"2017","unstructured":"Ilya Loshchilov and Frank Hutter. 2017. Decoupled Weight Decay Regularization. In International Conference on Learning Representations. https:\/\/api.semanticscholar.org\/CorpusID:53592270"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2955637"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.2"},{"key":"e_1_3_2_1_23_1","volume-title":"Thumbs up? Sentiment classification using machine learning techniques. arXiv preprint cs\/0205070","author":"Pang Bo","year":"2002","unstructured":"Bo Pang, Lillian Lee, and Shivakumar Vaithyanathan. 2002. Thumbs up? Sentiment classification using machine learning techniques. arXiv preprint cs\/0205070 (2002)."},{"key":"e_1_3_2_1_24_1","volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas K\u00f6pf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. ArXiv, Vol. abs\/1912.01703 (2019). https:\/\/api.semanticscholar.org\/CorpusID:202786778"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11050-020-09167-7"},{"key":"e_1_3_2_1_26_1","volume-title":"Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning. https:\/\/api.semanticscholar.org\/CorpusID:231591445","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning. https:\/\/api.semanticscholar.org\/CorpusID:231591445"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2016.7477679"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2008.2005605"},{"key":"e_1_3_2_1_29_1","volume-title":"Malouff","author":"Schutte Nicola S.","year":"2019","unstructured":"Nicola S. Schutte and John M. Malouff. 2019. A Meta?Analysis of the Relationship between Curiosity and Creativity. The Journal of Creative Behavior (2019). https:\/\/api.semanticscholar.org\/CorpusID:199157255"},{"key":"e_1_3_2_1_30_1","volume-title":"Amit Kumar Jakhar, and Shivam Pandey","author":"Singh Mrityunjay","year":"2021","unstructured":"Mrityunjay Singh, Amit Kumar Jakhar, and Shivam Pandey. 2021. Sentiment analysis on the impact of coronavirus in social life using the BERT model. Social Network Analysis and Mining, Vol. 11 (2021). https:\/\/api.semanticscholar.org\/CorpusID:232293517"},{"key":"e_1_3_2_1_31_1","volume-title":"Lexicon-based methods for sentiment analysis. Computational linguistics","author":"Taboada Maite","year":"2011","unstructured":"Maite Taboada, Julian Brooke, Milan Tofiloski, Kimberly Voll, and Manfred Stede. 2011. Lexicon-based methods for sentiment analysis. Computational linguistics, Vol. 37, 2 (2011), 267-307."},{"key":"e_1_3_2_1_32_1","volume-title":"Manning","author":"Tai Kai Sheng","year":"2015","unstructured":"Kai Sheng Tai, Richard Socher, and Christopher D. Manning. 2015. Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks. ArXiv, Vol. abs\/1503.00075 (2015). https:\/\/api.semanticscholar.org\/CorpusID:3033526"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681403"},{"key":"e_1_3_2_1_34_1","volume-title":"Attention-based LSTM for Aspect-level Sentiment Classification. In Conference on Empirical Methods in Natural Language Processing. https:\/\/api.semanticscholar.org\/CorpusID:18993998","author":"Wang Yequan","year":"2016","unstructured":"Yequan Wang, Minlie Huang, Xiaoyan Zhu, and Li Zhao. 2016. Attention-based LSTM for Aspect-level Sentiment Classification. In Conference on Empirical Methods in Natural Language Processing. https:\/\/api.semanticscholar.org\/CorpusID:18993998"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISI.2017.8004895"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132847.3133142"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210093"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.3035277"},{"key":"e_1_3_2_1_39_1","volume-title":"Multimodal Sentiment Detection Based on Multi-channel Graph Neural Networks. In Annual Meeting of the Association for Computational Linguistics. https:\/\/api.semanticscholar.org\/CorpusID:236460184","author":"Yang Xiaocui","year":"2021","unstructured":"Xiaocui Yang, Shi Feng, Yifei Zhang, and Daling Wang. 2021. Multimodal Sentiment Detection Based on Multi-channel Graph Neural Networks. In Annual Meeting of the Association for Computational Linguistics. https:\/\/api.semanticscholar.org\/CorpusID:236460184"},{"key":"e_1_3_2_1_40_1","volume-title":"Hovy","author":"Yang Zichao","year":"2016","unstructured":"Zichao Yang, Diyi Yang, Chris Dyer, Xiaodong He, Alex Smola, and Eduard H. Hovy. 2016. Hierarchical Attention Networks for Document Classification. In North American Chapter of the Association for Computational Linguistics. https:\/\/api.semanticscholar.org\/CorpusID:6857205"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.11591\/ijece.v9i5.pp3714-3719"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/2835776.2835779"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-022-13149-8"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3593583","article-title":"M3GAT: A Multi-modal, Multi-task Interactive Graph Attention Network for Conversational Sentiment Analysis and Emotion Recognition","volume":"42","author":"Zhang Yazhou","year":"2023","unstructured":"Yazhou Zhang, Ao Jia, Bo Wang, Peng Zhang, Dongming Zhao, Pu Li, Yuexian Hou, Xiaojia Jin, Dawei Song, and Jing Qin. 2023. M3GAT: A Multi-modal, Multi-task Interactive Graph Attention Network for Conversational Sentiment Analysis and Emotion Recognition. ACM Transactions on Information Systems, Vol. 42 (2023), 1 - 32. https:\/\/api.semanticscholar.org\/CorpusID:258788073","journal-title":"ACM Transactions on Information Systems"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3214989"}],"event":{"name":"WWW '26: The ACM Web Conference 2026","location":"Dubai United Arab Emirates","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2026"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3774904.3792393","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,7,4]],"date-time":"2026-07-04T07:32:07Z","timestamp":1783150327000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3774904.3792393"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,12]]},"references-count":45,"alternative-id":["10.1145\/3774904.3792393","10.1145\/3774904"],"URL":"https:\/\/doi.org\/10.1145\/3774904.3792393","relation":{},"subject":[],"published":{"date-parts":[[2026,4,12]]},"assertion":[{"value":"2026-04-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}