{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T05:47:22Z","timestamp":1777873642615,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No.62176043, No.62072077, No.U22A2097"],"award-info":[{"award-number":["No.62176043, No.62072077, No.U22A2097"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,3]]},"DOI":"10.1145\/3711896.3737101","type":"proceedings-article","created":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T13:30:13Z","timestamp":1754055013000},"page":"1241-1252","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["REDEEMing Modality Information Loss: Retrieval-Guided Conditional Generation for Severely Modality Missing Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-0876-0497","authenticated-orcid":false,"given":"Jian","family":"Lang","sequence":"first","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4977-1657","authenticated-orcid":false,"given":"Rongpei","family":"Hong","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0097-3617","authenticated-orcid":false,"given":"Zhangtao","family":"Cheng","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8163-3146","authenticated-orcid":false,"given":"Ting","family":"Zhong","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8699-8355","authenticated-orcid":false,"given":"Yong","family":"Wang","sequence":"additional","affiliation":[{"name":"Aiwen Tech, Zhengzhou, Henan, China and Hong Kong University of Science and Technology, Clear Water Bay, Hong Kong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8038-8150","authenticated-orcid":false,"given":"Fan","family":"Zhou","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, Sichuan, China and Key Laboratory of Intelligent Digital Media Technology of Sichuan Province, Chengdu, Sichuan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,8,3]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1095"},{"key":"e_1_3_2_2_2_1","volume-title":"Manuel Montes-y G\u00f3mez, and Fabio A Gonz\u00e1lez","author":"Arevalo John","year":"2017","unstructured":"John Arevalo, Thamar Solorio, Manuel Montes-y G\u00f3mez, and Fabio A Gonz\u00e1lez. 2017. Gated multimodal units for information fusion. arXiv preprint arXiv:1702.01992(2017)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219963"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671821"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.73"},{"key":"e_1_3_2_2_6_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Eigen David","year":"2013","unstructured":"David Eigen, Marc'Aurelio Ranzato, and Ilya Sutskever. 2013. Learning factored representations in a deep mixture of experts. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_7_1","first-page":"1","article-title":"Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity","volume":"23","author":"Fedus William","year":"2022","unstructured":"William Fedus, Barret Zoph, and Noam Shazeer. 2022. Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity. Journal of Machine Learning Research, Vol. 23, 120 (2022), 1-39.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-57808-3_12"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680949"},{"key":"e_1_3_2_2_10_1","volume-title":"Retrieval-Enhanced Contrastive Vision-Text Models. In The Twelfth International Conference on Learning Representations.","author":"Iscen Ahmet","unstructured":"Ahmet Iscen, Mathilde Caron, Alireza Fathi, and Cordelia Schmid. [n.d.]. Retrieval-Enhanced Contrastive Vision-Text Models. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.1.79"},{"key":"e_1_3_2_2_12_1","volume-title":"Towards Robust Multimodal Prompting with Missing Modalities. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 8070-8074","author":"Jang Jaehyuk","year":"2024","unstructured":"Jaehyuk Jang, Yooseung Wang, and Changick Kim. 2024. Towards Robust Multimodal Prompting with Missing Modalities. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 8070-8074."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671644"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3672069"},{"key":"e_1_3_2_2_15_1","first-page":"2611","article-title":"The hateful memes challenge: Detecting hate speech in multimodal memes","volume":"33","author":"Kiela Douwe","year":"2020","unstructured":"Douwe Kiela, Hamed Firooz, Aravind Mohan, Vedanuj Goswami, Amanpreet Singh, Pratik Ringshia, and Davide Testuggine. 2020. The hateful memes challenge: Detecting hate speech in multimodal memes. Advances in Neural Information Processing Systems (Neurips), Vol. 33 (2020), 2611-2624.","journal-title":"Advances in Neural Information Processing Systems (Neurips)"},{"key":"e_1_3_2_2_16_1","volume-title":"International Conference on Machine Learning (ICML). PMLR, 5583-5594","author":"Kim Wonjae","year":"2021","unstructured":"Wonjae Kim, Bokyung Son, and Ildoo Kim. 2021. Vilt: Vision-and-language transformer without convolution or region supervision. In International Conference on Machine Learning (ICML). PMLR, 5583-5594."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i17.33984"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01435"},{"key":"e_1_3_2_2_19_1","volume-title":"GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding. In International Conference on Learning Representations (ICLR).","author":"Lepikhin Dmitry","year":"2020","unstructured":"Dmitry Lepikhin, HyoukJoong Lee, Yuanzhong Xu, Dehao Chen, Orhan Firat, Yanping Huang, Maxim Krikun, Noam Shazeer, and Zhifeng Chen. 2020. GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_20_1","volume-title":"International Conference on Machine Learning (ICML). PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International Conference on Machine Learning (ICML). PMLR, 19730-19742."},{"key":"e_1_3_2_2_21_1","volume-title":"Correlation-Decoupled Knowledge Distillation for Multimodal Sentiment Analysis with Incomplete Modalities. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 12458-12468","author":"Li Mingcheng","year":"2024","unstructured":"Mingcheng Li, Dingkang Yang, Xiao Zhao, Shuaibing Wang, Yan Wang, Kun Yang, Mingyang Sun, Dongliang Kou, Ziyun Qian, and Lihua Zhang. 2024b. Correlation-Decoupled Knowledge Distillation for Multimodal Sentiment Analysis with Incomplete Modalities. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 12458-12468."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671681"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3560815"},{"key":"e_1_3_2_2_24_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Loshchilov Ilya","year":"2018","unstructured":"Ilya Loshchilov and Frank Hutter. 2018. Decoupled weight decay regularization. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01764"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16330"},{"key":"e_1_3_2_2_27_1","first-page":"9564","article-title":"Multimodal contrastive learning with limoe: the language-image mixture of experts","volume":"35","author":"Mustafa Basil","year":"2022","unstructured":"Basil Mustafa, Carlos Riquelme, Joan Puigcerver, Rodolphe Jenatton, and Neil Houlsby. 2022. Multimodal contrastive learning with limoe: the language-image mixture of experts. Advances in Neural Information Processing Systems (Neurips), Vol. 35 (2022), 9564-9576.","journal-title":"Advances in Neural Information Processing Systems (Neurips)"},{"key":"e_1_3_2_2_28_1","volume-title":"Pre-trained Vision and Language Transformers Are Few-Shot Incremental Learners. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 23881-23890","author":"Park Keon-Hee","year":"2024","unstructured":"Keon-Hee Park, Kyungwoo Song, and Gyeong-Moon Park. 2024. Pre-trained Vision and Language Transformers Are Few-Shot Incremental Learners. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 23881-23890."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3395035.3425202"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016892"},{"key":"e_1_3_2_2_31_1","volume-title":"International Conference on Machine Learning (ICML). PMLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning (ICML). PMLR, 8748-8763."},{"key":"e_1_3_2_2_32_1","volume-title":"Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer. In International Conference on Learning Representations (ICLR).","author":"Shazeer Noam","year":"2016","unstructured":"Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, and Jeff Dean. 2016. Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.758"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599839"},{"key":"e_1_3_2_2_35_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of Machine Learning Research, Vol. 9, 11 (2008).","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01524"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i14.29474"},{"key":"e_1_3_2_2_38_1","volume-title":"IEEE International Conference on Multimedia & Expo Workshops (ICME). IEEE, 1-6.","author":"Wang Xin","year":"2015","unstructured":"Xin Wang, Devinder Kumar, Nicolas Thome, Matthieu Cord, and Frederic Precioso. 2015. Recipe recognition with large multimodal food dataset. In IEEE International Conference on Multimedia & Expo Workshops (ICME). IEEE, 1-6."},{"key":"e_1_3_2_2_39_1","first-page":"17117","article-title":"Incomplete multimodality-diffused emotion recognition","volume":"36","author":"Wang Yuanzhi","year":"2023","unstructured":"Yuanzhi Wang, Yong Li, and Zhen Cui. 2023b. Incomplete multimodality-diffused emotion recognition. Advances in Neural Information Processing Systems, Vol. 36 (2023), 17117-17128.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_40_1","unstructured":"A Waswani N Shazeer N Parmar J Uszkoreit L Jones A Gomez L Kaiser and I Polosukhin. 2017. Attention is all you need. In Advances in Neural Information Processing Systems (Neurips)."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i3.25378"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681683"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i15.29578"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.558"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475585"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01302"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00366"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3672024"},{"key":"e_1_3_2_2_49_1","volume-title":"Is Sora a World Simulator? A Comprehensive Survey on General World Models and Beyond. arXiv","author":"Zhu Zheng","year":"2024","unstructured":"Zheng Zhu, Xiaofeng Wang, Wangbo Zhao, Chen Min, Nianchen Deng, Min Dou, Yuqi Wang, Botian Shi, Kai Wang, Chi Zhang, Yang You, Zhaoxiang Zhang, Dawei Zhao, Liang Xiao, Jian Zhao, Jiwen Lu, and Guan Huang. 2024b. Is Sora a World Simulator? A Comprehensive Survey on General World Models and Beyond. arXiv, Vol. abs\/2405.03520 (2024)."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095836"}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Toronto ON Canada","acronym":"KDD '25","sponsor":["SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711896.3737101","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T18:08:22Z","timestamp":1777572502000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711896.3737101"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,3]]},"references-count":50,"alternative-id":["10.1145\/3711896.3737101","10.1145\/3711896"],"URL":"https:\/\/doi.org\/10.1145\/3711896.3737101","relation":{},"subject":[],"published":{"date-parts":[[2025,8,3]]},"assertion":[{"value":"2025-08-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}