{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T13:24:02Z","timestamp":1773840242519,"version":"3.50.1"},"publisher-location":"Cham","reference-count":39,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032051813","type":"print"},{"value":"9783032051820","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,18]],"date-time":"2025-09-18T00:00:00Z","timestamp":1758153600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,18]],"date-time":"2025-09-18T00:00:00Z","timestamp":1758153600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-05182-0_28","type":"book-chapter","created":{"date-parts":[[2025,9,18]],"date-time":"2025-09-18T00:00:51Z","timestamp":1758153651000},"page":"280-290","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Learning Contrastive Multimodal Fusion with\u00a0Improved Modality Dropout for\u00a0Disease Detection and\u00a0Prediction"],"prefix":"10.1007","author":[{"given":"Yi","family":"Gu","sequence":"first","affiliation":[]},{"given":"Kuniaki","family":"Saito","sequence":"additional","affiliation":[]},{"given":"Jiaxin","family":"Ma","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,18]]},"reference":[{"key":"28_CR1","doi-asserted-by":"publisher","unstructured":"Chen, B., et\u00a0al.: A unified model for longitudinal multi-modal multi-view prediction with missingness. In: MICCAI, pp. 410\u2013420 (2024). https:\/\/doi.org\/10.1007\/978-3-031-72390-2_39","DOI":"10.1007\/978-3-031-72390-2_39"},{"key":"28_CR2","doi-asserted-by":"crossref","unstructured":"Ding, X., et\u00a0al.: HiA: towards Chinese multimodal LLMs for comparative high-resolution joint diagnosis. In: MICCAI, pp. 575\u2013586 (2024)","DOI":"10.1007\/978-3-031-72390-2_54"},{"key":"28_CR3","doi-asserted-by":"crossref","unstructured":"Feng, Y., et\u00a0al.: Unified multi-modal learning for any modality combinations in Alzheimer s disease diagnosis. In: MICCAI, pp. 487\u2013497 (2024)","DOI":"10.1007\/978-3-031-72384-1_46"},{"key":"28_CR4","doi-asserted-by":"publisher","unstructured":"Gao, Y., et\u00a0al.: MEDBind: unifying language and multimodal medical data embeddings. In: MICCAI. pp. 218\u2013228 (2024). https:\/\/doi.org\/10.1007\/978-3-031-72390-2_21","DOI":"10.1007\/978-3-031-72390-2_21"},{"key":"28_CR5","doi-asserted-by":"publisher","unstructured":"Gorishniy, Y., Rubachev, I., Khrulkov, V., Babenko, A.: Revisiting deep learning models for tabular data. In: NeurIPS (2021). https:\/\/doi.org\/10.1145\/3704728","DOI":"10.1145\/3704728"},{"key":"28_CR6","doi-asserted-by":"publisher","unstructured":"Grzeszczyk, M.K., et\u00a0al.: TabAttention: learning attention conditionally on tabular data. In: MICCAI, pp. 347\u2013357 (2023). https:\/\/doi.org\/10.1007\/978-3-031-43990-2_33","DOI":"10.1007\/978-3-031-43990-2_33"},{"key":"28_CR7","doi-asserted-by":"publisher","unstructured":"Hager, P., et\u00a0al.: Best of both worlds: multimodal contrastive learning with tabular and imaging data. In: CVPR, pp. 23924\u201323935, June 2023. https:\/\/doi.org\/10.1109\/CVPR52729.2023.02291","DOI":"10.1109\/CVPR52729.2023.02291"},{"issue":"1","key":"28_CR8","doi-asserted-by":"publisher","first-page":"22147","DOI":"10.1038\/s41598-020-78888-w","volume":"10","author":"SC Huang","year":"2020","unstructured":"Huang, S.C., et al.: Multimodal fusion with deep neural networks for leveraging CT imaging and electronic health record: a case-study in pulmonary embolism detection. Sci. Rep. 10(1), 22147 (2020)","journal-title":"Sci. Rep."},{"key":"28_CR9","doi-asserted-by":"publisher","unstructured":"Huang, S.C., et\u00a0al.: PENet a scalable deep-learning model for automated diagnosis of pulmonary embolism using volumetric CT imaging. npj Digit. Med. 3(1), 1\u20139 (2020). https:\/\/doi.org\/10.1038\/s41746-020-0266-y","DOI":"10.1038\/s41746-020-0266-y"},{"key":"28_CR10","doi-asserted-by":"crossref","unstructured":"Hussen\u00a0Abdelaziz, A.O.: Modality dropout for improved performance-driven talking faces. In: ICMI, pp. 378\u2013386, October 2020","DOI":"10.1145\/3382507.3418840"},{"key":"28_CR11","doi-asserted-by":"crossref","unstructured":"Jain, K., et\u00a0al.: MMBCD: multimodal breast cancer detection from mammograms with clinical history. In: MICCAI, pp. 144\u2013154 (2024)","DOI":"10.1007\/978-3-031-72378-0_14"},{"key":"28_CR12","doi-asserted-by":"crossref","unstructured":"Jiang, B., et\u00a0al.: MGDR: multi-modal graph disentangled representation for brain disease prediction. In: MICCAI, pp. 302\u2013312 (2024)","DOI":"10.1007\/978-3-031-72069-7_29"},{"key":"28_CR13","doi-asserted-by":"publisher","first-page":"18661","DOI":"10.5555\/3495724.3497291","volume":"33","author":"P Khosla","year":"2020","unstructured":"Khosla, P., et al.: Supervised contrastive learning. In: NeurIPS. 33, 18661\u201318673 (2020). https:\/\/doi.org\/10.5555\/3495724.3497291","journal-title":"Supervised contrastive learning. In: NeurIPS."},{"key":"28_CR14","doi-asserted-by":"publisher","unstructured":"Kim, D., et\u00a0al.: Learning cross-modal contrastive features for video domain adaptation. In: ICCV, pp. 13598\u201313607, October 2021. https:\/\/doi.org\/10.1109\/ICCV48922.2021.01336","DOI":"10.1109\/ICCV48922.2021.01336"},{"key":"28_CR15","doi-asserted-by":"crossref","unstructured":"Kim, K., Lee, Y., Park, D., Eo, T., Youn, D., Lee, H., Hwang, D.: LLM-guided multi-modal multiple instance learning for 5-year overall survival prediction of lung cancer. In: MICCAI, pp. 239\u2013249 (2024)","DOI":"10.1007\/978-3-031-72384-1_23"},{"key":"28_CR16","doi-asserted-by":"publisher","unstructured":"Krishna, G., et\u00a0al.: Modality drop-out for multimodal device directed speech detection using verbal and non-verbal features. In: ICASSP, pp. 8240\u20138244, April 2024. https:\/\/doi.org\/10.1109\/ICASSP48485.2024.10446421","DOI":"10.1109\/ICASSP48485.2024.10446421"},{"key":"28_CR17","doi-asserted-by":"publisher","unstructured":"Lee, Y.L., et\u00a0al.: Multimodal prompting with missing modalities for visual recognition. In: CVPR, pp. 14943\u201314952, June 2023. https:\/\/doi.org\/10.1109\/CVPR52729.2023.01435","DOI":"10.1109\/CVPR52729.2023.01435"},{"key":"28_CR18","doi-asserted-by":"crossref","unstructured":"Liu, S., et\u00a0al.: Multi-modal data fusion with missing data handling for mild cognitive impairment progression prediction. In: MICCAI, pp. 293\u2013302 (2024)","DOI":"10.1007\/978-3-031-72384-1_28"},{"key":"28_CR19","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: ICLR (2019)"},{"key":"28_CR20","unstructured":"Lundberg, S.M., Lee, S.I.: A unified approach to interpreting model predictions. In: NeurIPS, vol.\u00a030. Curran Associates, Inc. (2017)"},{"key":"28_CR21","unstructured":"McDermott, M., et\u00a0al.: A closer look at auroc and auprc under class imbalance. In: NeurIPS, vol.\u00a037, pp. 44102\u201344163 (2024)"},{"key":"28_CR22","unstructured":"National Lung Screening Trial Research Team: Data from the National Lung Screening Trial (NLST). The Cancer Imaging Archive (2013)"},{"issue":"8","key":"28_CR23","doi-asserted-by":"publisher","first-page":"1692","DOI":"10.1109\/TPAMI.2015.2461544","volume":"38","author":"N Neverova","year":"2016","unstructured":"Neverova, N., et al.: ModDrop: adaptive multi-modal gesture recognition. IEEE TPAMI 38(8), 1692\u20131706 (2016). https:\/\/doi.org\/10.1109\/TPAMI.2015.2461544","journal-title":"IEEE TPAMI"},{"key":"28_CR24","doi-asserted-by":"publisher","unstructured":"P lsterl, S., et\u00a0al.: Combining 3D image and tabular data via the dynamic affine feature map transform. In: MICCAI, pp. 688\u2013698 (2021). https:\/\/doi.org\/10.1007\/978-3-030-87240-3_66","DOI":"10.1007\/978-3-030-87240-3_66"},{"key":"28_CR25","doi-asserted-by":"crossref","unstructured":"Qi, A., et\u00a0al.: Multimodal emotion recognition with vision-language prompting and modality dropout. In: Proceedings of the 2nd International Workshop on Multimodal and Responsible Affective Computing, pp. 49\u201353 (Oct 2024)","DOI":"10.1145\/3689092.3689401"},{"issue":"1","key":"28_CR26","doi-asserted-by":"publisher","first-page":"3404","DOI":"10.1038\/s41467-022-31037-5","volume":"13","author":"S Qiu","year":"2022","unstructured":"Qiu, S., et al.: Multimodal deep learning for Alzheimer s disease dementia assessment. Nat. Commun. 13(1), 3404 (2022). https:\/\/doi.org\/10.1038\/s41467-022-31037-5","journal-title":"Nat. Commun."},{"key":"28_CR27","doi-asserted-by":"crossref","unstructured":"Qu, L., et\u00a0al.: Multi-modal data binding for survival analysis modeling with incomplete data and annotations. In: MICCAI, pp. 501\u2013510 (2024)","DOI":"10.1007\/978-3-031-72086-4_47"},{"key":"28_CR28","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: ICML, July 2021. https:\/\/proceedings.mlr.press\/v139\/radford21a.html"},{"key":"28_CR29","doi-asserted-by":"publisher","unstructured":"Robinet, L., et\u00a0al.: DRIM: learning disentangled representations from incomplete multimodal healthcare data. In: MICCAI, pp. 163\u2013173 (2024). https:\/\/doi.org\/10.1007\/978-3-031-72384-1_16","DOI":"10.1007\/978-3-031-72384-1_16"},{"key":"28_CR30","doi-asserted-by":"crossref","unstructured":"Xiong, C., et\u00a0al.: MoME: mixture of multimodal experts for cancer survival prediction. In: MICCA, pp. 318\u2013328 (2024)","DOI":"10.1007\/978-3-031-72083-3_30"},{"key":"28_CR31","doi-asserted-by":"crossref","unstructured":"Xiong, Z., et\u00a0al.: Multi-modality 3D CNN transformer for assisting clinical decision in intracerebral hemorrhage. In: MICCAI, pp. 522\u2013531 (2024)","DOI":"10.1007\/978-3-031-72086-4_49"},{"key":"28_CR32","doi-asserted-by":"publisher","unstructured":"Xu, J., et\u00a0al.: Temporal neighboring multi-modal transformer with missingness-aware prompt for hepatocellular carcinoma prediction. In: MICCAI, pp. 79\u201388 (2024). https:\/\/doi.org\/10.1007\/978-3-031-72378-0_8","DOI":"10.1007\/978-3-031-72378-0_8"},{"key":"28_CR33","unstructured":"Yang, L., et\u00a0al.: Advancing multimodal medical capabilities of gemini. arXiv preprint arXiv:2405.03162 (2024)"},{"key":"28_CR34","unstructured":"Yu, J., et\u00a0al.: Coca: contrastive captioners are image-text foundation models. TMLR (2022). https:\/\/openreview.net\/forum?id=Ee277P3AYC"},{"key":"28_CR35","doi-asserted-by":"publisher","unstructured":"Zhai, X., et\u00a0al.: Sigmoid loss for language image pre-training. In: ICCV, pp. 11941\u201311952, October 2023. https:\/\/doi.org\/10.1109\/ICCV51070.2023.01100","DOI":"10.1109\/ICCV51070.2023.01100"},{"key":"28_CR36","doi-asserted-by":"crossref","unstructured":"Zhang, S., et al.: M2Fusion: multi-time multimodal fusion for prediction of pathological complete response in breast cancer. In: MICCAI, pp. 458\u2013468 (2024)","DOI":"10.1007\/978-3-031-72086-4_43"},{"key":"28_CR37","doi-asserted-by":"publisher","unstructured":"Zhou, Q., et\u00a0al.: PathM3: a multimodal multi-task multiple instance learning framework for whole slide image classification and captioning. In: MICCAI, pp. 373\u2013383 (2024). https:\/\/doi.org\/10.1007\/978-3-031-72083-3_35","DOI":"10.1007\/978-3-031-72083-3_35"},{"key":"28_CR38","doi-asserted-by":"publisher","unstructured":"Zhou, Y., et\u00a0al.: RadFusion: Benchmarking Performance and Fairness for Multimodal Pulmonary Embolism Detection from CT and EHR, November 2021. https:\/\/doi.org\/10.48550\/arXiv.2111.11665","DOI":"10.48550\/arXiv.2111.11665"},{"key":"28_CR39","doi-asserted-by":"publisher","unstructured":"Zou, H., Hastie, T.: Regularization and Variable Selection Via the Elastic Net. J. R. Stat. Soc. Ser. B Methodol. 67(2), 301\u2013320 (2005). https:\/\/doi.org\/10.1111\/j.1467-9868.2005.00503.x","DOI":"10.1111\/j.1467-9868.2005.00503.x"}],"container-title":["Lecture Notes in Computer Science","Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-05182-0_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,18]],"date-time":"2025-09-18T00:01:01Z","timestamp":1758153661000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-05182-0_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,18]]},"ISBN":["9783032051813","9783032051820"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-05182-0_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,18]]},"assertion":[{"value":"18 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MICCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Medical Image Computing and Computer-Assisted Intervention","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miccai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conferences.miccai.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}