{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T22:13:06Z","timestamp":1763417586930,"version":"3.45.0"},"publisher-location":"Singapore","reference-count":39,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819530519"},{"type":"electronic","value":"9789819530526"}],"license":[{"start":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T00:00:00Z","timestamp":1763424000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T00:00:00Z","timestamp":1763424000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-3052-6_5","type":"book-chapter","created":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T22:07:29Z","timestamp":1763417249000},"page":"53-68","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multimodal Sentiment Analysis with\u00a0Modality-Robust and\u00a0-Biased Representations and\u00a0Distance-Aware Contrastive Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-8700-2602","authenticated-orcid":false,"given":"Lang","family":"Shen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8247-4562","authenticated-orcid":false,"given":"Qifei","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3833-2794","authenticated-orcid":false,"given":"Wenjuan","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9690-1337","authenticated-orcid":false,"given":"Minfeng","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4749-5552","authenticated-orcid":false,"given":"Xiubo","family":"Liang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,11,18]]},"reference":[{"key":"5_CR1","doi-asserted-by":"publisher","unstructured":"Bagher\u00a0Zadeh, A., Liang, P.P., Poria, S., Cambria, E., Morency, L.P.: Multimodal language analysis in the wild: CMU-MOSEI dataset and interpretable dynamic fusion graph. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) (2018). https:\/\/doi.org\/10.18653\/v1\/p18-1208","DOI":"10.18653\/v1\/p18-1208"},{"key":"5_CR2","doi-asserted-by":"publisher","unstructured":"Chen, Y., Yuan, J., You, Q., Luo, J.: Twitter sentiment analysis via bi-sense emoji embedding and attention-based LSTM. In: Proceedings of the 26th ACM International Conference on Multimedia, pp. 117\u2013125 (2018). https:\/\/doi.org\/10.1145\/3240508.3240533","DOI":"10.1145\/3240508.3240533"},{"key":"5_CR3","doi-asserted-by":"publisher","unstructured":"Dai, W., Cahyawijaya, S., Liu, Z., Fung, P.: Multimodal end-to-end sparse model for emotion recognition. In: Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (2021). https:\/\/doi.org\/10.18653\/v1\/2021.naacl-main.417","DOI":"10.18653\/v1\/2021.naacl-main.417"},{"key":"5_CR4","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North (2019). https:\/\/doi.org\/10.18653\/v1\/n19-1423","DOI":"10.18653\/v1\/n19-1423"},{"key":"5_CR5","doi-asserted-by":"publisher","unstructured":"Geirhos, R., et al.: Shortcut learning in deep neural networks. Nat. Mach. Intell. 665\u2013673 (2020). https:\/\/doi.org\/10.1038\/s42256-020-00257-z","DOI":"10.1038\/s42256-020-00257-z"},{"key":"5_CR6","doi-asserted-by":"crossref","unstructured":"Hazarika, D., Zimmermann, R., Poria, S.: Misa: modality-invariant and -specific representations for multimodal sentiment analysis. In: Proceedings of the 28th ACM International Conference on Multimedia. Cornell University (2020)","DOI":"10.1145\/3394171.3413678"},{"key":"5_CR7","doi-asserted-by":"publisher","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 1735\u20131780 (1997). https:\/\/doi.org\/10.1162\/neco.1997.9.8.1735","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"5_CR8","doi-asserted-by":"publisher","unstructured":"Hu, J., Fang, Q., Qian, S., Xu, C.: Multi-modal attentive graph pooling model for community question answer matching. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 3505\u20133513 (2020). https:\/\/doi.org\/10.1145\/3394171.3413711","DOI":"10.1145\/3394171.3413711"},{"key":"5_CR9","doi-asserted-by":"publisher","unstructured":"Lei, X., Qian, X., Zhao, G.: Rating prediction based on social sentiment from textual reviews. IEEE Trans. Multimedia 1910\u20131921 (2016). https:\/\/doi.org\/10.1109\/tmm.2016.2575738","DOI":"10.1109\/tmm.2016.2575738"},{"key":"5_CR10","unstructured":"Lin, Z., et al.: Modeling intra-and inter-modal relations: hierarchical graph contrastive learning for multimodal sentiment analysis"},{"key":"5_CR11","doi-asserted-by":"publisher","unstructured":"Liu, Z., Shen, Y., Lakshminarasimhan, V.B., Liang, P.P., Bagher Zadeh, A., Morency, L.P.: Efficient low-rank multimodal fusion with modality-specific factors. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) (2018). https:\/\/doi.org\/10.18653\/v1\/p18-1209","DOI":"10.18653\/v1\/p18-1209"},{"key":"5_CR12","doi-asserted-by":"publisher","unstructured":"Mai, S., Hu, H., Xing, S.: Modality to modality translation: an adversarial representation learning and graph fusion network for multimodal fusion. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 164\u2013172 (2020). https:\/\/doi.org\/10.1609\/aaai.v34i01.5347","DOI":"10.1609\/aaai.v34i01.5347"},{"key":"5_CR13","doi-asserted-by":"publisher","unstructured":"Mai, S., Zeng, Y., Zheng, S., Hu, H.: Hybrid contrastive learning of tri-modal representation for multimodal sentiment analysis. IEEE Trans. Affect. Comput. 1 (2022). https:\/\/doi.org\/10.1109\/taffc.2022.3172360","DOI":"10.1109\/taffc.2022.3172360"},{"key":"5_CR14","unstructured":"Nam, J., Cha, H., Ahn, S., Lee, J.H., Shin, J.: Learning from failure: de-biasing classifier from biased classifier. In: Neural Information Processing Systems (2020)"},{"key":"5_CR15","unstructured":"Oord, A., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. arXiv:1807.03748 (2018)"},{"key":"5_CR16","doi-asserted-by":"publisher","unstructured":"Pang, B., Lee, L.: A sentimental education. In: Proceedings of the 42nd Annual Meeting on Association for Computational Linguistics - ACL 2004 (2004). https:\/\/doi.org\/10.3115\/1218955.1218990","DOI":"10.3115\/1218955.1218990"},{"key":"5_CR17","doi-asserted-by":"crossref","unstructured":"Qi, J., Tang, K., Sun, Q., Hua, X.S., Zhang, H.: Class is invariant to context and vice versa: on learning invariance for out-of-distribution generalization (2022)","DOI":"10.1007\/978-3-031-19806-9_6"},{"key":"5_CR18","doi-asserted-by":"publisher","unstructured":"Rahman, W., et al.: Integrating multimodal information in large pretrained transformers. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.214","DOI":"10.18653\/v1\/2020.acl-main.214"},{"key":"5_CR19","doi-asserted-by":"publisher","unstructured":"Soleymani, M., Garcia, D., Jou, B., Schuller, B., Chang, S.F., Pantic, M.: A survey of multimodal sentiment analysis. Image Vis. Comput 3\u201314 (2017). https:\/\/doi.org\/10.1016\/j.imavis.2017.08.003","DOI":"10.1016\/j.imavis.2017.08.003"},{"key":"5_CR20","unstructured":"Sun, T., Jing, L., Wei, Y., Song, X., Cheng, Z., Nie, L.: Dual consistency-enhanced semi-supervised sentiment analysis towards COVID-19 tweets"},{"issue":"2","key":"5_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3475872","volume":"18","author":"T Sun","year":"2022","unstructured":"Sun, T., Wang, C., Song, X., Feng, F., Nie, L.: Response generation by jointly modeling personalized linguistic styles and emotions. ACM Trans. Multimed. Comput. Commun. Appl. 18(2), 1\u201320 (2022). https:\/\/doi.org\/10.1145\/3475872","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"5_CR22","doi-asserted-by":"publisher","unstructured":"Sun, T., Wang, W., Jing, L., Cui, Y., Song, X., Nie, L.: Counterfactual reasoning for out-of-distribution multimodal sentiment analysis. In: Proceedings of the 30th ACM International Conference on Multimedia (2022). https:\/\/doi.org\/10.1145\/3503161.3548211","DOI":"10.1145\/3503161.3548211"},{"key":"5_CR23","doi-asserted-by":"publisher","unstructured":"Sun, Z., Sarma, P., Sethares, W., Liang, Y.: Learning relationships between text, audio, and video via deep canonical correlation for multimodal language analysis. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 8992\u20138999 (2020). https:\/\/doi.org\/10.1609\/aaai.v34i05.6431","DOI":"10.1609\/aaai.v34i05.6431"},{"key":"5_CR24","doi-asserted-by":"publisher","unstructured":"Tsai, Y.H.H., Bai, S., Liang, P.P., Kolter, J.Z., Morency, L.P., Salakhutdinov, R.: Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (2019). https:\/\/doi.org\/10.18653\/v1\/p19-1656","DOI":"10.18653\/v1\/p19-1656"},{"key":"5_CR25","unstructured":"Tsai, Y.H.H., Liang, P.P., Zadeh, A., Morency, L.P., Salakhutdinov, R.: Learning factorized multimodal representations. In: International Conference on Learning Representations (2019). https:\/\/openreview.net\/forum?id=rygqqsA9KX"},{"key":"5_CR26","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Neural Information Processing Systems (2017)"},{"key":"5_CR27","unstructured":"Wang, D., Liu, S., Wang, Q., Tian, Y., He, L., Gao, X.: Cross-modal enhancement network for multimodal sentiment analysis"},{"key":"5_CR28","doi-asserted-by":"publisher","unstructured":"Wang, Y., Shen, Y., Liu, Z., Liang, P.P., Zadeh, A., Morency, L.P.: Words can shift: dynamically adjusting word representations using nonverbal behaviors. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 7216\u20137223 (2019). https:\/\/doi.org\/10.1609\/aaai.v33i01.33017216","DOI":"10.1609\/aaai.v33i01.33017216"},{"key":"5_CR29","unstructured":"Yang, D., Huang, S.: Disentangled representation learning for multimodal emotion recognition"},{"key":"5_CR30","unstructured":"Yang, J., Yu, Y., Niu, D., Guo, W., Xu, Y.: Confede: contrastive feature decomposition for multimodal sentiment analysis"},{"key":"5_CR31","doi-asserted-by":"publisher","unstructured":"Yang, K., Xu, H., Gao, K.: CM-Bert. In: Proceedings of the 28th ACM International Conference on Multimedia (2020). https:\/\/doi.org\/10.1145\/3394171.3413690","DOI":"10.1145\/3394171.3413690"},{"key":"5_CR32","unstructured":"Yang, Z., Dai, Z., Yang, Y., Carbonell, J., Salakhutdinov, R., Le, Q.: XLNet: generalized autoregressive pretraining for language understanding"},{"key":"5_CR33","doi-asserted-by":"publisher","unstructured":"Yu, W., Xu, H., Yuan, Z., Wu, J.: Learning modality-specific representations with self-supervised multi-task learning for multimodal sentiment analysis. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 10790\u201310797 (2022). https:\/\/doi.org\/10.1609\/aaai.v35i12.17289","DOI":"10.1609\/aaai.v35i12.17289"},{"key":"5_CR34","doi-asserted-by":"crossref","unstructured":"Zadeh, A., Chen, M., Poria, S., Cambria, E., Morency, L.P.: Tensor fusion network for multimodal sentiment analysis. arXiv:1707.07250 (2017)","DOI":"10.18653\/v1\/D17-1115"},{"key":"5_CR35","doi-asserted-by":"publisher","unstructured":"Zadeh, A., Liang, P.P., Mazumder, N., Poria, S., Cambria, E., Morency, L.P.: Memory fusion network for multi-view sequential learning. In: Proceedings of the AAAI Conference on Artificial Intelligence (2022). https:\/\/doi.org\/10.1609\/aaai.v32i1.12021","DOI":"10.1609\/aaai.v32i1.12021"},{"key":"5_CR36","doi-asserted-by":"publisher","unstructured":"Zadeh, A., Liang, P.P., Poria, S., Vij, P., Cambria, E., Morency, L.P.: Multi-attention recurrent network for human communication comprehension. In: Proceedings of the AAAI Conference on Artificial Intelligence (2022). https:\/\/doi.org\/10.1609\/aaai.v32i1.12024","DOI":"10.1609\/aaai.v32i1.12024"},{"key":"5_CR37","unstructured":"Zadeh, A., Zellers, R., Pincus, E., Morency, L.P.: Mosi: multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos. arXiv:1606.06259 (2016)"},{"key":"5_CR38","unstructured":"Zhang, Z., Sabuncu, M.: Generalized cross entropy loss for training deep neural networks with noisy labels. In: Advances in Neural Information Processing Systems (2018)"},{"key":"5_CR39","doi-asserted-by":"publisher","unstructured":"Zolfaghari, M., Zhu, Y., Gehler, P., Brox, T.: CrossCLR: cross-modal contrastive learning for multi-modal video representations. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV) (2021). https:\/\/doi.org\/10.1109\/iccv48922.2021.00148","DOI":"10.1109\/iccv48922.2021.00148"}],"container-title":["Lecture Notes in Computer Science","Knowledge Science, Engineering and Management"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-3052-6_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T22:07:32Z","timestamp":1763417252000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-3052-6_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,18]]},"ISBN":["9789819530519","9789819530526"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-3052-6_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,11,18]]},"assertion":[{"value":"18 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"KSEM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Knowledge Science, Engineering and Management","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Macao","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 August 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 August 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ksem2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ksem2025.scimeeting.cn\/en\/web\/index\/27434","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}