{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T05:54:33Z","timestamp":1763790873997,"version":"3.45.0"},"publisher-location":"Singapore","reference-count":34,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819533459","type":"print"},{"value":"9789819533466","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T00:00:00Z","timestamp":1763856000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T00:00:00Z","timestamp":1763856000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-3346-6_2","type":"book-chapter","created":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T05:50:19Z","timestamp":1763790619000},"page":"16-33","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["MRF: A Modality-Resilient Framework for\u00a0Handling Missing Modalities in\u00a0Multimodal Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4611-7928","authenticated-orcid":false,"given":"Pengfei","family":"Du","sequence":"first","affiliation":[]},{"given":"Yongjun","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Xu","family":"Chang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3543-6272","authenticated-orcid":false,"given":"Ruifan","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,23]]},"reference":[{"key":"2_CR1","unstructured":"Alayrac, J.B., et al.: Flamingo: a visual language model for few-shot learning. arXiv preprint arXiv:2204.14198 (2022)"},{"key":"2_CR2","doi-asserted-by":"crossref","unstructured":"Antol, S., et al.: VQA: visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2425\u20132433 (2015)","DOI":"10.1109\/ICCV.2015.279"},{"key":"2_CR3","first-page":"17283","volume":"33","author":"B Bischke","year":"2021","unstructured":"Bischke, B., Helber, P., Dengel, A., Borth, D.: GAN-based modality imputation for robust multimodal learning. Neural Comput. Appl. 33, 17283\u201317297 (2021)","journal-title":"Neural Comput. Appl."},{"key":"2_CR4","doi-asserted-by":"publisher","unstructured":"Busso, C., et al.: IEMOCAP: interactive emotional dyadic motion capture database. Lang. Resour. Eval., 335\u2013359 (2008). https:\/\/doi.org\/10.1007\/s10579-008-9076-6. http:\/\/dx.doi.org\/10.1007\/s10579-008-9076-6","DOI":"10.1007\/s10579-008-9076-6"},{"key":"2_CR5","first-page":"83","volume":"158","author":"Y Chen","year":"2022","unstructured":"Chen, Y., Zhao, H., Sun, G.: Ensemble learning strategies for multimodal fusion with missing modalities. Pattern Recogn. Lett. 158, 83\u201394 (2022)","journal-title":"Pattern Recogn. Lett."},{"key":"2_CR6","unstructured":"Goodfellow, I., Bengio, Y., Courville, A.: Deep Learning. MIT Press (2016)"},{"key":"2_CR7","unstructured":"Hafner, D., Ruder, S., Kumar, S., Sun, K.: Optical flow distillation: a knowledge transfer approach for multimodal learning. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 1245\u20131257 (2021)"},{"key":"2_CR8","doi-asserted-by":"crossref","unstructured":"Hao, X., et al.: MixGen: a new multi-modal data augmentation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 379\u2013389 (2023)","DOI":"10.1109\/WACVW58289.2023.00042"},{"key":"2_CR9","doi-asserted-by":"crossref","unstructured":"Hazarika, D., Zimmermann, R., Poria, S.: MISA: modality-invariant and -specific representations for multimodal sentiment analysis. In: Proceedings of the 28th ACM International Conference on Multimedia (MM), pp. 1122\u20131131. Association for Computing Machinery (2020)","DOI":"10.1145\/3394171.3413678"},{"key":"2_CR10","doi-asserted-by":"crossref","unstructured":"Hsu, W.N., Bolte, B., Tsai, Y.H.H., Lakhotia, K., Salakhutdinov, R., Mohamed, A.: HuBERT: self-supervised speech representation learning by masked prediction of hidden units. In: International Conference on Machine Learning (ICML), pp. 12950\u201312960 (2021)","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"2_CR11","unstructured":"Kenton, J.D.M.W.C., Toutanova, L.K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL-HLT, Minneapolis, Minnesota, vol.\u00a01 (2019)"},{"key":"2_CR12","unstructured":"Li, X., Zhao, Y., Liu, X., Meng, H.: Autoencoder-based modality imputation for multimodal learning. In: Proceedings of the International Conference on Learning Representations (ICLR) (2022)"},{"key":"2_CR13","unstructured":"Lian, S., Wang, J., He, J., Zhang, Q.: Graph completion networks for multimodal learning with missing modalities. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI), pp. 8961\u20138969 (2022)"},{"issue":"7","key":"2_CR14","first-page":"8419","volume":"45","author":"Z Lian","year":"2023","unstructured":"Lian, Z., Chen, L., Sun, L., Liu, B., Tao, J.: GCNet: graph completion network for incomplete multimodal learning in conversation. IEEE Trans. Pattern Anal. Mach. Intell. 45(7), 8419\u20138432 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2_CR15","doi-asserted-by":"crossref","unstructured":"Little, R.J., Rubin, D.B.: Statistical Analysis with Missing Data, vol.\u00a0793. Wiley (2019)","DOI":"10.1002\/9781119482260"},{"key":"2_CR16","unstructured":"Liu, F., Yang, X., Zhang, H., Wang, C.: KDNet: knowledge distillation for multimodal learning with incomplete data. In: Proceedings of the 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5289\u20135298 (2019)"},{"key":"2_CR17","unstructured":"Liu, X., Li, R., Ye, S., Zhang, G., Wang, X.: Multimodal aspect-based sentiment analysis under conditional relation. In: Proceedings of the 31st International Conference on Computational Linguistics, pp. 313\u2013323 (2025)"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Liu, Z., Shen, Y., Lakshminarasimhan, V.B., Liang, P.P., Zadeh, A., Morency, L.P.: Efficient low-rank multimodal fusion with modality-specific factors. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (ACL), vol.\u00a01, pp. 2247\u20132256. Association for Computational Linguistics (2018)","DOI":"10.18653\/v1\/P18-1209"},{"key":"2_CR19","doi-asserted-by":"publisher","first-page":"1692","DOI":"10.1109\/TPAMI.2015.2461544","volume":"38","author":"N Neverova","year":"2016","unstructured":"Neverova, N., Wolf, C., Lacey, G., Taylor, G.W.: ModDrop: adaptive multi-modal gesture recognition. IEEE Trans. Pattern Anal. Mach. Intell. 38, 1692\u20131704 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2_CR20","unstructured":"Ngiam, J., Khosla, A., Kim, M., Nam, J., Lee, H., Ng, A.Y.: Multimodal deep learning. In: Proceedings of the 28th International Conference on Machine Learning (ICML 2011), pp. 689\u2013696 (2011)"},{"key":"2_CR21","unstructured":"Qian, Y., Wu, X., Zhou, Q., Xie, Y., Chen, M.: Masked modality attention for multimodal learning with missing data. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5901\u20135910 (2023)"},{"key":"2_CR22","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"2_CR23","unstructured":"Shen, H., Zhao, R., Guo, X., Chen, J.: Domain adaptation via intermediate feature distillation for multimodal learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 7342\u20137353 (2023)"},{"key":"2_CR24","unstructured":"Shen, Y., Sun, Z., Yu, J., Liu, Q., Wang, F.: HuggingGPT: adaptive large language model coordination for multimodal tasks. arXiv preprint arXiv:2303.17580 (2023)"},{"key":"2_CR25","unstructured":"Shuster, K., Humeau, S., Hu, H., Bordes, A., Weston, J.: Multimodal open-domain dialogue. arXiv preprint arXiv:2010.01082 (2021)"},{"key":"2_CR26","doi-asserted-by":"crossref","unstructured":"Tran, D., Hsu, W.C., Cheng, J., Sluijter, R., Prasad, S.: Missing modalities imputation via cascaded residual autoencoder. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 73\u201382 (2017)","DOI":"10.1109\/CVPR.2017.528"},{"key":"2_CR27","doi-asserted-by":"crossref","unstructured":"Tsai, Y.H.H., Zadeh, A., Morency, L.P., Salakhutdinov, R.: Multimodal routing: improving local and global interpretability of multimodal language analysis. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics (ACL), pp. 5403\u20135415 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.143"},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: a neural image caption generator. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3156\u20133164 (2015)","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"2_CR29","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1016\/j.patrec.2023.10.008","volume":"163","author":"T Wang","year":"2023","unstructured":"Wang, T., Jiang, Z., Zhang, J., Ma, J., Sun, H.: Multimodal learning with diffusion models for missing modality imputation. Pattern Recogn. Lett. 163, 21\u201330 (2023)","journal-title":"Pattern Recogn. Lett."},{"key":"2_CR30","unstructured":"Wu, H., Zhang, Q., Liu, J., Huang, X., Yu, P.S.: Deep multimodal learning with missing modality: a survey. arXiv preprint arXiv:2401.12345 (2024)"},{"key":"2_CR31","doi-asserted-by":"publisher","unstructured":"Yu, W., et al.: CH-SIMS: a Chinese multimodal sentiment analysis dataset with fine-grained annotation of modality. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.343. http:\/\/dx.doi.org\/10.18653\/v1\/2020.acl-main.343","DOI":"10.18653\/v1\/2020.acl-main.343"},{"key":"2_CR32","doi-asserted-by":"crossref","unstructured":"Zadeh, A., Chen, M., Poria, S., Cambria, E., Morency, L.P.: Tensor fusion network for multimodal sentiment analysis. In: Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1103\u20131114 (2017)","DOI":"10.18653\/v1\/D17-1115"},{"key":"2_CR33","doi-asserted-by":"crossref","unstructured":"Zadeh, A., Liang, P.P., Poria, S., Cambria, E., Morency, L.P.: Memory fusion network for multimodal sentiment analysis. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1234\u20131244. Association for Computational Linguistics (2018)","DOI":"10.18653\/v1\/D17-1115"},{"key":"2_CR34","doi-asserted-by":"publisher","unstructured":"Zadeh, A., Zellers, R., Pincus, E., Morency, L.P.: Multimodal sentiment intensity analysis in videos: facial gestures and verbal messages. IEEE Intell. Syst., 82\u201388 (2016). https:\/\/doi.org\/10.1109\/mis.2016.94. http:\/\/dx.doi.org\/10.1109\/mis.2016.94","DOI":"10.1109\/mis.2016.94"}],"container-title":["Lecture Notes in Computer Science","Natural Language Processing and Chinese Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-3346-6_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T05:50:34Z","timestamp":1763790634000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-3346-6_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,23]]},"ISBN":["9789819533459","9789819533466"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-3346-6_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,23]]},"assertion":[{"value":"23 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NLPCC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CCF International Conference on Natural Language Processing and Chinese Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 August 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 August 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nlpcc2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tcci.ccf.org.cn\/conference\/2025\/index.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}