{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T12:48:16Z","timestamp":1763729296342,"version":"3.45.0"},"publisher-location":"Singapore","reference-count":37,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819543663","type":"print"},{"value":"9789819543670","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T00:00:00Z","timestamp":1763769600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T00:00:00Z","timestamp":1763769600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-4367-0_6","type":"book-chapter","created":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T12:37:23Z","timestamp":1763728643000},"page":"76-90","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DMBL: Dual-Stage Multimodal Balanced Learning for\u00a0Multimodal Sentiment Analysis"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-2551-4425","authenticated-orcid":false,"given":"YiYang","family":"Tang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9930-2903","authenticated-orcid":false,"given":"Qian","family":"Chen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5729-0410","authenticated-orcid":false,"given":"NanJie","family":"Zheng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3410-2155","authenticated-orcid":false,"given":"Ning","family":"Luo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,22]]},"reference":[{"key":"6_CR1","unstructured":"Alabdulmohsin, I., Maennel, H., Keysers, D.: The impact of reinitialization on generalization in convolutional neural networks. arXiv preprint arXiv:2109.00267 (2021)"},{"key":"6_CR2","first-page":"3884","volume":"33","author":"J Ash","year":"2020","unstructured":"Ash, J., Adams, R.P.: On warm-starting neural network training. Adv. Neural. Inf. Process. Syst. 33, 3884\u20133894 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"6_CR3","doi-asserted-by":"crossref","unstructured":"Baltru\u0161aitis, T., Robinson, P., Morency, L.P.: Openface: an open source facial behavior analysis toolkit. In: 2016 IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 1\u201310. IEEE (2016)","DOI":"10.1109\/WACV.2016.7477553"},{"key":"6_CR4","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"6_CR5","doi-asserted-by":"crossref","unstructured":"Feng, X., Lin, Y., He, L., Li, Y., Chang, L., Zhou, Y.: Knowledge-guided dynamic modality attention fusion framework for multimodal sentiment analysis. arXiv preprint arXiv:2410.04491 (2024)","DOI":"10.18653\/v1\/2024.findings-emnlp.865"},{"key":"6_CR6","doi-asserted-by":"crossref","unstructured":"Gandhi, A., Adhvaryu, K., Khanduja, V.: Multimodal sentiment analysis: review, application domains and future directions. In: 2021 IEEE Pune Section International Conference (PuneCon), pp.\u00a01\u20135. IEEE (2021)","DOI":"10.1109\/PuneCon52575.2021.9686504"},{"key":"6_CR7","doi-asserted-by":"crossref","unstructured":"Han, W., Chen, H., Poria, S.: Improving multimodal fusion with hierarchical mutual information maximization for multimodal sentiment analysis. arXiv preprint arXiv:2109.00412 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.723"},{"key":"6_CR8","doi-asserted-by":"crossref","unstructured":"Hazarika, D., Li, Y., Cheng, B., Zhao, S., Zimmermann, R., Poria, S.: Analyzing modality robustness in multimodal sentiment analysis. arXiv preprint arXiv:2205.15465 (2022)","DOI":"10.18653\/v1\/2022.naacl-main.50"},{"key":"6_CR9","doi-asserted-by":"crossref","unstructured":"Hazarika, D., Zimmermann, R., Poria, S.: Misa: modality-invariant and-specific representations for multimodal sentiment analysis. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 1122\u20131131 (2020)","DOI":"10.1145\/3394171.3413678"},{"key":"6_CR10","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.111346","volume":"285","author":"J Huang","year":"2024","unstructured":"Huang, J., Zhou, J., Tang, Z., Lin, J., Chen, C.Y.C.: TMBL: transformer-based multimodal binding learning model for multimodal sentiment analysis. Knowl.-Based Syst. 285, 111346 (2024)","journal-title":"Knowl.-Based Syst."},{"key":"6_CR11","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1016\/j.inffus.2019.06.019","volume":"53","author":"Y Jiang","year":"2020","unstructured":"Jiang, Y., Li, W., Hossain, M.S., Chen, M., Alelaiwi, A., Al-Hammadi, M.: A snapshot research and implementation of multimodal information fusion for data-driven emotion recognition. Inf. Fusion 53, 209\u2013221 (2020)","journal-title":"Inf. Fusion"},{"key":"6_CR12","unstructured":"Kenton, J.D.M.W.C., Toutanova, L.K.: Bert: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL-HLT, Minneapolis, Minnesota, vol.\u00a01, p.\u00a02 (2019)"},{"key":"6_CR13","doi-asserted-by":"crossref","unstructured":"Kumar, A., Vepa, J.: Gated mechanism for attention based multi modal sentiment analysis. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4477\u20134481. IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9053012"},{"key":"6_CR14","unstructured":"Li, Z., et al.: Amoa: global acoustic feature enhanced modal-order-aware network for multimodal sentiment analysis. In: Proceedings of the 29th International Conference on Computational Linguistics, pp. 7136\u20137146 (2022)"},{"key":"6_CR15","doi-asserted-by":"crossref","unstructured":"Liu, Z., Shen, Y., Lakshminarasimhan, V.B., Liang, P.P., Zadeh, A., Morency, L.P.: Efficient low-rank multimodal fusion with modality-specific factors. arXiv preprint arXiv:1806.00064 (2018)","DOI":"10.18653\/v1\/P18-1209"},{"key":"6_CR16","doi-asserted-by":"crossref","unstructured":"Lv, F., Chen, X., Huang, Y., Duan, L., Lin, G.: Progressive modality reinforcement for human multimodal emotion recognition from unaligned multimodal sequences. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2554\u20132562 (2021)","DOI":"10.1109\/CVPR46437.2021.00258"},{"key":"6_CR17","doi-asserted-by":"crossref","unstructured":"Mao, H., Yuan, Z., Xu, H., Yu, W., Liu, Y., Gao, K.: M-SENA: an integrated platform for multimodal sentiment analysis. arXiv preprint arXiv:2203.12441 (2022)","DOI":"10.18653\/v1\/2022.acl-demo.20"},{"key":"6_CR18","doi-asserted-by":"crossref","unstructured":"McFee, B., et al.: librosa: audio and music signal analysis in python. In: SciPy, pp. 18\u201324 (2015)","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"6_CR19","doi-asserted-by":"crossref","unstructured":"Rahman, W., et al.: Integrating multimodal information in large pretrained transformers. In: Proceedings of the Conference. Association for Computational Linguistics. Meeting, vol.\u00a02020, p.\u00a02359. NIH Public Access (2020)","DOI":"10.18653\/v1\/2020.acl-main.214"},{"key":"6_CR20","unstructured":"Sehwag, V., Chiang, M., Mittal, P.: On separability of self-supervised representations. In: ICML Workshop on Uncertainty and Robustness in Deep Learning (UDL), vol. 3 (2020)"},{"key":"6_CR21","doi-asserted-by":"crossref","unstructured":"Sun, H., Wang, H., Liu, J., Chen, Y.W., Lin, L.: Cubemlp: an MLP-based model for multimodal sentiment analysis and depression estimation. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 3722\u20133729 (2022)","DOI":"10.1145\/3503161.3548025"},{"key":"6_CR22","doi-asserted-by":"crossref","unstructured":"Tsai, Y.H.H., Bai, S., Liang, P.P., Kolter, J.Z., Morency, L.P., Salakhutdinov, R.: Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the Conference. Association for Computational Linguistics. Meeting, vol.\u00a02019, p.\u00a06558. NIH Public Access (2019)","DOI":"10.18653\/v1\/P19-1656"},{"key":"6_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.109259","volume":"136","author":"D Wang","year":"2023","unstructured":"Wang, D., Guo, X., Tian, Y., Liu, J., He, L., Luo, X.: TETFN: a text enhanced transformer fusion network for multimodal sentiment analysis. Pattern Recogn. 136, 109259 (2023)","journal-title":"Pattern Recogn."},{"key":"6_CR24","doi-asserted-by":"crossref","unstructured":"Wang, D., Liu, S., Wang, Q., Tian, Y., He, L., Gao, X.: Cross-modal enhancement network for multimodal sentiment analysis. IEEE Trans. Multimedia (2022)","DOI":"10.1109\/TMM.2022.3183830"},{"key":"6_CR25","doi-asserted-by":"crossref","unstructured":"Wei, Y., Li, S., Feng, R., Hu, D.: Diagnosing and re-learning for balanced multimodal learning. In: European Conference on Computer Vision, pp. 71\u201386. Springer (2024)","DOI":"10.1007\/978-3-031-73039-9_5"},{"key":"6_CR26","doi-asserted-by":"crossref","unstructured":"Wong, K.C.: A short survey on data clustering algorithms. In: 2015 Second International Conference on Soft Computing and Machine Intelligence (ISCMI), pp. 64\u201368. IEEE (2015)","DOI":"10.1109\/ISCMI.2015.10"},{"key":"6_CR27","unstructured":"Wu, N., Jastrzebski, S., Cho, K., Geras, K.J.: Characterizing and overcoming the greedy nature of learning in multi-modal deep neural networks, pp. 24043\u201324055 (2022)"},{"key":"6_CR28","doi-asserted-by":"crossref","unstructured":"Yang, D., Huang, S., Kuang, H., Du, Y., Zhang, L.: Disentangled representation learning for multimodal emotion recognition. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 1642\u20131651 (2022)","DOI":"10.1145\/3503161.3547754"},{"key":"6_CR29","unstructured":"Yang, Z., Wei, Y., Liang, C., Hu, D.: Quantifying and enhancing multi-modal robustness with modality preference. arXiv preprint arXiv:2402.06244 (2024)"},{"key":"6_CR30","doi-asserted-by":"crossref","unstructured":"Ying, X.: An overview of overfitting and its solutions. In: Journal of Physics: Conference Series, vol.\u00a01168, p. 022022. IOP Publishing (2019)","DOI":"10.1088\/1742-6596\/1168\/2\/022022"},{"key":"6_CR31","doi-asserted-by":"crossref","unstructured":"Yu, W., Xu, H., Yuan, Z., Wu, J.: Learning modality-specific representations with self-supervised multi-task learning for multimodal sentiment analysis. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 10790\u201310797 (2021)","DOI":"10.1609\/aaai.v35i12.17289"},{"key":"6_CR32","doi-asserted-by":"crossref","unstructured":"Zadeh, A., Chen, M., Poria, S., Cambria, E., Morency, L.P.: Tensor fusion network for multimodal sentiment analysis. arXiv preprint arXiv:1707.07250 (2017)","DOI":"10.18653\/v1\/D17-1115"},{"key":"6_CR33","doi-asserted-by":"crossref","unstructured":"Zadeh, A., Liang, P.P., Mazumder, N., Poria, S., Cambria, E., Morency, L.P.: Memory fusion network for multi-view sequential learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.12021"},{"key":"6_CR34","doi-asserted-by":"crossref","unstructured":"Zadeh, A., Liang, P.P., Poria, S., Vij, P., Cambria, E., Morency, L.P.: Multi-attention recurrent network for human communication comprehension. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.12024"},{"issue":"6","key":"6_CR35","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MIS.2016.94","volume":"31","author":"A Zadeh","year":"2016","unstructured":"Zadeh, A., Zellers, R., Pincus, E., Morency, L.P.: Multimodal sentiment intensity analysis in videos: facial gestures and verbal messages. IEEE Intell. Syst. 31(6), 82\u201388 (2016)","journal-title":"IEEE Intell. Syst."},{"key":"6_CR36","doi-asserted-by":"crossref","unstructured":"Zadeh, A.B., Liang, P.P., Poria, S., Cambria, E., Morency, L.P.: Multimodal language analysis in the wild: CMU-MOSEI dataset and interpretable dynamic fusion graph. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2236\u20132246 (2018)","DOI":"10.18653\/v1\/P18-1208"},{"key":"6_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, H., Wang, Y., Yin, G., Liu, K., Liu, Y., Yu, T.: Learning language-guided adaptive hyper-modality representation for multimodal sentiment analysis. arXiv preprint arXiv:2310.05804 (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.49"}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-4367-0_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T12:37:38Z","timestamp":1763728658000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-4367-0_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,22]]},"ISBN":["9789819543663","9789819543670"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-4367-0_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,22]]},"assertion":[{"value":"22 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Okinawa","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 November 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 November 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"32","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iconip2025.apnns.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}