{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T12:56:29Z","timestamp":1777380989802,"version":"3.51.4"},"reference-count":40,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100014103","name":"Key Technology Research and Development Program of Shandong","doi-asserted-by":"publisher","award":["2023CXGC011101"],"award-info":[{"award-number":["2023CXGC011101"]}],"id":[{"id":"10.13039\/100014103","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008118","name":"Shandong Academy of Sciences","doi-asserted-by":"publisher","award":["2024ZDZX08"],"award-info":[{"award-number":["2024ZDZX08"]}],"id":[{"id":"10.13039\/501100008118","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Array"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.array.2026.100736","type":"journal-article","created":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T01:10:10Z","timestamp":1773969010000},"page":"100736","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Hybrid fusion framework for multimodal data integration"],"prefix":"10.1016","volume":"30","author":[{"given":"Di","family":"Wang","sequence":"first","affiliation":[]},{"given":"Wenjian","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Xin","family":"Nie","sequence":"additional","affiliation":[]},{"given":"Yuankun","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Xueyan","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Wanlei","family":"Zhou","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.array.2026.100736_b1","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2024.102690","article-title":"Review of multimodal machine learning approaches in healthcare","volume":"114","author":"Krones","year":"2025","journal-title":"Inf Fusion"},{"key":"10.1016\/j.array.2026.100736_b2","series-title":"Navigating the future of healthcare","author":"Musunuri","year":"2025"},{"issue":"1","key":"10.1016\/j.array.2026.100736_b3","doi-asserted-by":"crossref","first-page":"3382","DOI":"10.1038\/s41467-024-47737-z","article-title":"A patient-specific lung cancer assembloid model with heterogeneous tumor microenvironments","volume":"15","author":"Zhang","year":"2024","journal-title":"Nat Commun"},{"key":"10.1016\/j.array.2026.100736_b4","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2023.102069","article-title":"EHR-KnowGen: Knowledge-enhanced multimodal learning for disease diagnosis generation","volume":"102","author":"Niu","year":"2024","journal-title":"Inf Fusion"},{"key":"10.1016\/j.array.2026.100736_b5","doi-asserted-by":"crossref","unstructured":"Pham TT, Brecheisen J, Nguyen A, Nguyen H, Le N. I-AI: A Controllable & Interpretable AI System for Decoding Radiologists\u2019 Intense Focus for Accurate CXR Diagnoses. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision. 2024, p. 7850\u20139.","DOI":"10.1109\/WACV57701.2024.00767"},{"key":"10.1016\/j.array.2026.100736_b6","article-title":"Has multimodal learning delivered universal intelligence in healthcare? A comprehensive survey","author":"Lin","year":"2024","journal-title":"Inf Fusion"},{"issue":"1","key":"10.1016\/j.array.2026.100736_b7","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1038\/s41746-022-00712-8","article-title":"Multimodal machine learning in precision health: A scoping review","volume":"5","author":"Kline","year":"2022","journal-title":"npj Digit Med"},{"key":"10.1016\/j.array.2026.100736_b8","doi-asserted-by":"crossref","DOI":"10.1016\/j.ins.2023.119854","article-title":"CheXMed: A multimodal learning algorithm for pneumonia detection in the elderly","volume":"654","author":"Ren","year":"2024","journal-title":"Inform Sci"},{"key":"10.1016\/j.array.2026.100736_b9","first-page":"1","article-title":"Exploring scalable medical image encoders beyond text supervision","author":"P\u00e9rez-Garc\u00eda","year":"2025","journal-title":"Nat Mach Intell"},{"issue":"1","key":"10.1016\/j.array.2026.100736_b10","doi-asserted-by":"crossref","DOI":"10.1056\/AIoa2400640","article-title":"A multimodal biomedical foundation model trained from fifteen million image\u2013text pairs","volume":"2","author":"Zhang","year":"2025","journal-title":"NEJM AI"},{"key":"10.1016\/j.array.2026.100736_b11","article-title":"Efficient medical vision-language alignment through adapting masked vision models","author":"Lian","year":"2025","journal-title":"IEEE Trans Med Imaging"},{"key":"10.1016\/j.array.2026.100736_b12","doi-asserted-by":"crossref","first-page":"4706","DOI":"10.1109\/TMM.2023.3325965","article-title":"Multi-task paired masking with alignment modeling for medical vision-language pre-training","volume":"26","author":"Zhang","year":"2023","journal-title":"IEEE Trans Multimed"},{"key":"10.1016\/j.array.2026.100736_b13","series-title":"International conference on algorithms and architectures for parallel processing","first-page":"145","article-title":"Modal-centric insights into multimodal federated learning for smart healthcare: A survey","author":"Wang","year":"2024"},{"key":"10.1016\/j.array.2026.100736_b14","series-title":"BIOCOMPUTING 2021: proceedings of the Pacific symposium","first-page":"232","article-title":"CheXclusion: Fairness gaps in deep chest X-ray classifiers","author":"Seyyed-Kalantari","year":"2020"},{"key":"10.1016\/j.array.2026.100736_b15","doi-asserted-by":"crossref","DOI":"10.1016\/j.cmpb.2024.108198","article-title":"Attentional decoder networks for chest X-ray image recognition on high-resolution features","volume":"251","author":"Kang","year":"2024","journal-title":"Comput Methods Programs Biomed"},{"key":"10.1016\/j.array.2026.100736_b16","doi-asserted-by":"crossref","unstructured":"Lu Y, Hu Y, Li L, Xu Z, Liu H, Liang H, Fu X. CvTGNet: A Novel Framework for Chest X-Ray Multi-label Classification. In: Proceedings of the 21st ACM international conference on computing frontiers. 2024, p. 12\u201320.","DOI":"10.1145\/3649153.3649216"},{"key":"10.1016\/j.array.2026.100736_b17","first-page":"1","article-title":"Multi-label chest X-ray image classification via category disentangled causal learning","author":"Li","year":"2025","journal-title":"IEEE Trans Artif Intell"},{"key":"10.1016\/j.array.2026.100736_b18","doi-asserted-by":"crossref","DOI":"10.1016\/j.bspc.2024.107018","article-title":"Multi-label chest X-ray image classification based on long-range dependencies capture and label relationships learning","volume":"100","author":"Zhao","year":"2025","journal-title":"Biomed Signal Process Control"},{"key":"10.1016\/j.array.2026.100736_b19","doi-asserted-by":"crossref","DOI":"10.1016\/j.metrad.2025.100172","article-title":"Intelligent analysis of chest X-ray based on multi-modal instruction tuning","author":"Yao","year":"2025","journal-title":"Meta-Radiol"},{"issue":"9","key":"10.1016\/j.array.2026.100736_b20","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3649447","article-title":"Deep multimodal data fusion","volume":"56","author":"Zhao","year":"2024","journal-title":"ACM Comput Surv"},{"issue":"5","key":"10.1016\/j.array.2026.100736_b21","first-page":"1237","article-title":"Gated convolutional transformer for multi-modal learning","volume":"45","author":"Zhou","year":"2023","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"10.1016\/j.array.2026.100736_b22","first-page":"150","article-title":"Gated transformer for speech recognition","volume":"30","author":"Li","year":"2022","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"10.1016\/j.array.2026.100736_b23","article-title":"A review of deep learning-based information fusion techniques for multimodal medical image classification","author":"Li","year":"2024","journal-title":"Comput Biol Med"},{"issue":"10","key":"10.1016\/j.array.2026.100736_b24","doi-asserted-by":"crossref","first-page":"12113","DOI":"10.1109\/TPAMI.2023.3275156","article-title":"Multimodal learning with transformers: A survey","volume":"45","author":"Xu","year":"2023","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"10.1016\/j.array.2026.100736_b25","doi-asserted-by":"crossref","unstructured":"Li Y, Quan R, Zhu L, Yang Y. Efficient multimodal fusion via interactive prompting. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2023, p. 2604\u201313.","DOI":"10.1109\/CVPR52729.2023.00256"},{"issue":"3","key":"10.1016\/j.array.2026.100736_b26","doi-asserted-by":"crossref","first-page":"1712","DOI":"10.1109\/TCSVT.2023.3296745","article-title":"TUFusion: A transformer-based universal fusion algorithm for multimodal images","volume":"34","author":"Zhao","year":"2023","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"10.1016\/j.array.2026.100736_b27","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2024.127466","article-title":"GRAformer: A gated residual attention transformer for multivariate time series forecasting","volume":"581","author":"Yang","year":"2024","journal-title":"Neurocomputing"},{"key":"10.1016\/j.array.2026.100736_b28","series-title":"TACFN: transformer-based adaptive cross-modal fusion network for multimodal emotion recognition","author":"Liu","year":"2025"},{"key":"10.1016\/j.array.2026.100736_b29","doi-asserted-by":"crossref","first-page":"10209","DOI":"10.1007\/s00521-019-04559-1","article-title":"Gated multimodal networks","volume":"32","author":"Arevalo","year":"2020","journal-title":"Neural Comput Appl"},{"issue":"1","key":"10.1016\/j.array.2026.100736_b30","first-page":"225","article-title":"A medical multimodal multitask foundation model for lung cancer and cardiovascular risk prediction","volume":"16","author":"Lee","year":"2025","journal-title":"Nat Commun"},{"key":"10.1016\/j.array.2026.100736_b31","article-title":"CLIP in medical imaging: A survey","author":"Zhao","year":"2025","journal-title":"Med Image Anal"},{"key":"10.1016\/j.array.2026.100736_b32","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2023.101968","article-title":"When CLIP meets cross-modal hashing retrieval: A new strong baseline","volume":"100","author":"Xia","year":"2023","journal-title":"Inf Fusion"},{"key":"10.1016\/j.array.2026.100736_b33","doi-asserted-by":"crossref","unstructured":"Ali M, Khan S. Clip-decoder: Zeroshot multilabel classification using multimodal clip aligned representations. In: Proceedings of the IEEE\/CVF international conference on computer vision. 2023, p. 4675\u20139.","DOI":"10.1109\/ICCVW60793.2023.00505"},{"key":"10.1016\/j.array.2026.100736_b34","article-title":"MCPL: Multi-modal collaborative prompt learning for medical vision-language model","author":"Wang","year":"2024","journal-title":"IEEE Trans Med Imaging"},{"key":"10.1016\/j.array.2026.100736_b35","doi-asserted-by":"crossref","DOI":"10.1016\/j.compmedimag.2025.102516","article-title":"Unibrain: Universal brain mri diagnosis with hierarchical knowledge-enhanced pre-training","volume":"122","author":"Lei","year":"2025","journal-title":"Comput Med Imaging Graph"},{"key":"10.1016\/j.array.2026.100736_b36","series-title":"International conference on medical image computing and computer-assisted intervention","first-page":"420","article-title":"Xplainer: From x-ray observations to explainable zero-shot diagnosis","author":"Pellegrini","year":"2023"},{"key":"10.1016\/j.array.2026.100736_b37","series-title":"International conference on medical image computing and computer-assisted intervention","first-page":"403","article-title":"Clip-lung: Textual knowledge-guided lung nodule malignancy prediction","author":"Lei","year":"2023"},{"key":"10.1016\/j.array.2026.100736_b38","doi-asserted-by":"crossref","DOI":"10.1016\/j.compbiomed.2024.108505","article-title":"Nodule-CLIP: Lung nodule classification based on multi-modal contrastive learning","volume":"175","author":"Sun","year":"2024","journal-title":"Comput Biol Med"},{"issue":"12","key":"10.1016\/j.array.2026.100736_b39","doi-asserted-by":"crossref","first-page":"7895","DOI":"10.1007\/s00330-024-10834-0","article-title":"Class imbalance on medical image classification: towards better evaluation practices for discrimination and calibration performance","volume":"34","author":"Mosquera","year":"2024","journal-title":"Eur Radiol"},{"key":"10.1016\/j.array.2026.100736_b40","article-title":"A new benchmark: Clinical uncertainty and severity aware labeled chest X-ray images with multi-relationship graph learning","author":"Zhang","year":"2024","journal-title":"IEEE Trans Med Imaging"}],"container-title":["Array"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S2590005626000597?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S2590005626000597?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T09:19:36Z","timestamp":1777367976000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S2590005626000597"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":40,"alternative-id":["S2590005626000597"],"URL":"https:\/\/doi.org\/10.1016\/j.array.2026.100736","relation":{},"ISSN":["2590-0056"],"issn-type":[{"value":"2590-0056","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Hybrid fusion framework for multimodal data integration","name":"articletitle","label":"Article Title"},{"value":"Array","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.array.2026.100736","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Author(s). Published by Elsevier Inc.","name":"copyright","label":"Copyright"}],"article-number":"100736"}}