{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T01:10:26Z","timestamp":1755825026676,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1145\/3731715.3733498","type":"proceedings-article","created":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T18:31:04Z","timestamp":1750876264000},"page":"173-182","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["<i>\n              <b>\u03b3<\/b>\n            <\/i>\n            -CRD:Gamma-Cooperative Retrieval Diffusion Model for Robust Incomplete Multimodal Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8944-6759","authenticated-orcid":false,"given":"Ruiting","family":"Dai","sequence":"first","affiliation":[{"name":"University of Electronic Science and Technology of China, ChengDu, Sichuan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2612-0690","authenticated-orcid":false,"given":"Wenwei","family":"Zhu","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, ChengDu, Sichuan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8474-9719","authenticated-orcid":false,"given":"Zheyu","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, ChengDu, Sichuan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2794-797X","authenticated-orcid":false,"given":"Haoran","family":"Meng","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, ChengDu, Sichuan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6971-5649","authenticated-orcid":false,"given":"Zhengdao","family":"Yuan","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, ChengDu, Sichuan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6331-4594","authenticated-orcid":false,"given":"Yandong","family":"Yan","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4742-4456","authenticated-orcid":false,"given":"Lisi","family":"Mo","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, ChengDu, Sichuan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,6,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.31117\/neuroscirn.v3i5.80"},{"key":"e_1_3_2_1_2_1","volume-title":"International conference on machine learning. PMLR, 1247-- 1255","author":"Andrew Galen","year":"2013","unstructured":"Galen Andrew, Raman Arora, Jeff Bilmes, and Karen Livescu. 2013. Deep canonical correlation analysis. In International conference on machine learning. PMLR, 1247-- 1255."},{"key":"e_1_3_2_1_3_1","volume-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations. Advances in neural information processing systems 33","author":"Baevski Alexei","year":"2020","unstructured":"Alexei Baevski, Yuhao Zhou, Abdelrahman Mohamed, and Michael Auli. 2020. wav2vec 2.0: A framework for self-supervised learning of speech representations. Advances in neural information processing systems 33 (2020), 12449--12460."},{"key":"e_1_3_2_1_4_1","volume-title":"International Conference on Machine Learning. PMLR, 1692--1717","author":"Bao Fan","year":"2023","unstructured":"Fan Bao, Shen Nie, Kaiwen Xue, Chongxuan Li, Shi Pu, Yaole Wang, Gang Yue, Yue Cao, Hang Su, and Jun Zhu. 2023. One transformer fits all distributions in multi-modal diffusion at scale. In International Conference on Machine Learning. PMLR, 1692--1717."},{"key":"e_1_3_2_1_5_1","volume-title":"Mobyen Uddin Ahmed, and Shahina Begum","author":"Barua Arnab","year":"2023","unstructured":"Arnab Barua, Mobyen Uddin Ahmed, and Shahina Begum. 2023. A systematic literature review on multimodal machine learning: Applications, challenges, gaps and future directions. Ieee access 11 (2023), 14804--14831."},{"key":"e_1_3_2_1_6_1","volume-title":"Mohammad Dahman Alshehri, Lei Liu, Peiying Zhang, and Keping Yu.","author":"Duan Youxiang","year":"2022","unstructured":"Youxiang Duan, Ning Chen, Ali Kashif Bashir, Mohammad Dahman Alshehri, Lei Liu, Peiying Zhang, and Keping Yu. 2022. A web knowledge-driven multimodal retrieval method in computational social systems: Unsupervised and robust graph convolutional hashing. IEEE Transactions on Computational Social Systems (2022)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10448015"},{"key":"e_1_3_2_1_8_1","volume-title":"Multimodal prompt learning with missing modalities for sentiment analysis and emotion recognition. arXiv preprint arXiv:2407.05374","author":"Guo Zirun","year":"2024","unstructured":"Zirun Guo, Tao Jin, and Zhou Zhao. 2024. Multimodal prompt learning with missing modalities for sentiment analysis and emotion recognition. arXiv preprint arXiv:2407.05374 (2024)."},{"key":"e_1_3_2_1_9_1","volume-title":"Deberta: Decoding-enhanced bert with disentangled attention. arXiv","author":"He Pengcheng","year":"2006","unstructured":"Pengcheng He, Xiaodong Liu, Jianfeng Gao, and Weizhu Chen. 2006. Deberta: Decoding-enhanced bert with disentangled attention. arXiv 2020. arXiv preprint arXiv:2006.03654 (2006)."},{"key":"e_1_3_2_1_10_1","volume-title":"Reducing the dimensionality of data with neural networks. Science 313, 5786","author":"Hinton Geoffrey E","year":"2006","unstructured":"Geoffrey E Hinton and Ruslan R Salakhutdinov. 2006. Reducing the dimensionality of data with neural networks. Science 313, 5786 (2006), 504--507."},{"key":"e_1_3_2_1_11_1","volume-title":"Denoising diffusion probabilistic models. Advances in neural information processing systems 33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems 33 (2020), 6840--6851."},{"key":"e_1_3_2_1_12_1","first-page":"1","article-title":"Root mean square error (RMSE) or mean absolute error (MAE): When to use them or not","volume":"2022","author":"Hodson Timothy O","year":"2022","unstructured":"Timothy O Hodson. 2022. Root mean square error (RMSE) or mean absolute error (MAE): When to use them or not. Geoscientific Model Development Discussions 2022 (2022), 1--10.","journal-title":"Geoscientific Model Development Discussions"},{"volume-title":"Breakthroughs in statistics: methodology and distribution","author":"Hotelling Harold","key":"e_1_3_2_1_13_1","unstructured":"Harold Hotelling. 1992. Relations between two sets of variates. In Breakthroughs in statistics: methodology and distribution. Springer, 162--190."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/s12065-020-00536-z"},{"key":"e_1_3_2_1_15_1","volume-title":"Retrieval- Augmented Dynamic Prompt Tuning for Incomplete Multimodal Learning. arXiv preprint arXiv:2501.01120","author":"Lang Jian","year":"2025","unstructured":"Jian Lang, Zhangtao Cheng, Ting Zhong, and Fan Zhou. 2025. Retrieval- Augmented Dynamic Prompt Tuning for Incomplete Multimodal Learning. arXiv preprint arXiv:2501.01120 (2025)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01435"},{"key":"e_1_3_2_1_17_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730--19742."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3234553"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procir.2021.03.083"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1155\/2022\/6365712"},{"key":"e_1_3_2_1_21_1","volume-title":"Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. Advances in neural information processing systems 32","author":"Lu Jiasen","year":"2019","unstructured":"Jiasen Lu, Dhruv Batra, Devi Parikh, and Stefan Lee. 2019. Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_22_1","volume-title":"Maximum Likelihood Estimation for Multimodal Learning with Missing Modality. CoRR abs\/2108.10513","author":"Ma Fei","year":"2021","unstructured":"Fei Ma, Xiangxiang Xu, Shao-Lun Huang, and Lin Zhang. 2021. Maximum Likelihood Estimation for Multimodal Learning with Missing Modality. CoRR abs\/2108.10513 (2021). arXiv:2108.10513 https:\/\/arxiv.org\/abs\/2108.10513"},{"key":"e_1_3_2_1_23_1","first-page":"6881","article-title":"Trustworthy multimodal regression with mixture of normal- inverse gamma distributions","volume":"34","author":"Ma Huan","year":"2021","unstructured":"Huan Ma, Zongbo Han, Changqing Zhang, Huazhu Fu, Joey Tianyi Zhou, and Qinghua Hu. 2021. Trustworthy multimodal regression with mixture of normal- inverse gamma distributions. Advances in Neural Information Processing Systems 34 (2021), 6881--6893.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01763"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.envpol.2024.123463"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016892"},{"key":"e_1_3_2_1_27_1","volume-title":"International conference on machine learning. PmLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748--8763."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-48308-5_34"},{"key":"e_1_3_2_1_29_1","volume-title":"Mamba: Multi-level aggregation via memory bank for video object detection. In Proceed- ings of the AAAI Conference on Artificial Intelligence","author":"Sun Guanxiong","year":"2021","unstructured":"Guanxiong Sun, Yang Hua, Guosheng Hu, and Neil Robertson. 2021. Mamba: Multi-level aggregation via memory bank for video object detection. In Proceed- ings of the AAAI Conference on Artificial Intelligence, Vol. 35. 2620--2627."},{"key":"e_1_3_2_1_30_1","volume-title":"Joint multimodal learning with deep generative models. arXiv preprint arXiv:1611.01891","author":"Suzuki Masahiro","year":"2016","unstructured":"Masahiro Suzuki, Kotaro Nakayama, and Yutaka Matsuo. 2016. Joint multimodal learning with deep generative models. arXiv preprint arXiv:1611.01891 (2016)."},{"key":"e_1_3_2_1_31_1","volume-title":"Marcel AJ van Gerven, and Rob van Lier","author":"Thielen Jordy","year":"2019","unstructured":"Jordy Thielen, Sander E Bosch, Tessa M van Leeuwen, Marcel AJ van Gerven, and Rob van Lier. 2019. Neuroimaging findings on amodal completion: A review. i-Perception 10, 2 (2019), 2041669519840047."},{"key":"e_1_3_2_1_32_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.528"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01524"},{"key":"e_1_3_2_1_35_1","volume-title":"International conference on machine learning. PMLR, 1083--1092","author":"Wang Weiran","year":"2015","unstructured":"Weiran Wang, Raman Arora, Karen Livescu, and Jeff Bilmes. 2015. On deep multi-view representation learning. In International conference on machine learning. PMLR, 1083--1092."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02013"},{"key":"e_1_3_2_1_37_1","first-page":"17117","article-title":"Incomplete multimodality-diffused emotion recognition","volume":"36","author":"Wang Yuanzhi","year":"2023","unstructured":"Yuanzhi Wang, Yong Li, and Zhen Cui. 2023. Incomplete multimodality-diffused emotion recognition. Advances in Neural Information Processing Systems 36 (2023), 17117--17128.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_38_1","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"37","author":"Woo Sangmin","year":"2023","unstructured":"Sangmin Woo, Sumin Lee, Yeonju Park, Muhammad Adi Nugroho, and Changick Kim. 2023. Towards Good Practices for Missing Modality Robust Action Recog- nition. In Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 37."},{"key":"e_1_3_2_1_39_1","volume-title":"Deep multimodal learning with missing modality: A survey. arXiv preprint arXiv:2409.07825","author":"Wu Renjie","year":"2024","unstructured":"Renjie Wu, Hu Wang, Hsiang-Ting Chen, and Gustavo Carneiro. 2024. Deep multimodal learning with missing modality: A survey. arXiv preprint arXiv:2409.07825 (2024)."},{"key":"e_1_3_2_1_40_1","volume-title":"A decision support system in precision medicine: contrastive multimodal learning for patient stratification. Annals of Operations Research","author":"Yin Qing","year":"2023","unstructured":"Qing Yin, Linda Zhong, Yunya Song, Liang Bai, Zhihua Wang, Chen Li, Yida Xu, and Xian Yang. 2023. A decision support system in precision medicine: contrastive multimodal learning for patient stratification. Annals of Operations Research (2023), 1--29."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.343"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/MIS.2016.94"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1208"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3532064"},{"key":"e_1_3_2_1_45_1","unstructured":"Qingyang Zhang Yake Wei Zongbo Han Huazhu Fu Xi Peng Cheng Deng Qinghua Hu Cai Xu Jie Wen Di Hu et al. 2024. Multimodal fusion on low-quality data: A comprehensive survey. arXiv preprint arXiv:2404.18947 (2024)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1080\/03610926.2018.1465081"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3674501"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.203"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671462"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3093397"}],"event":{"name":"ICMR '25: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Chicago IL USA","acronym":"ICMR '25"},"container-title":["Proceedings of the 2025 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731715.3733498","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T04:04:36Z","timestamp":1755749076000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731715.3733498"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":50,"alternative-id":["10.1145\/3731715.3733498","10.1145\/3731715"],"URL":"https:\/\/doi.org\/10.1145\/3731715.3733498","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]},"assertion":[{"value":"2025-06-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}