{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T14:10:43Z","timestamp":1780495843673,"version":"3.54.1"},"reference-count":53,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100020771","name":"Natural Science Foundation for Young Scientists of Shanxi Province","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100020771","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62576216"],"award-info":[{"award-number":["62576216"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["12326610"],"award-info":[{"award-number":["12326610"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,11]]},"DOI":"10.1016\/j.patcog.2026.113540","type":"journal-article","created":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T16:40:51Z","timestamp":1773938451000},"page":"113540","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PA","title":["Enhancing 3D medical multi-modal large language models with integrated human body priors for computed tomography"],"prefix":"10.1016","volume":"179","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-1286-919X","authenticated-orcid":false,"given":"Leilei","family":"Zeng","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1327-1315","authenticated-orcid":false,"given":"Jie","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7457-9540","authenticated-orcid":false,"given":"Wenting","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chenyang","family":"Lyu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wenxi","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9535-3649","authenticated-orcid":false,"given":"Shaonan","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-3916-3533","authenticated-orcid":false,"given":"Xiande","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1420-0815","authenticated-orcid":false,"given":"Linlin","family":"Shen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"issue":"9","key":"10.1016\/j.patcog.2026.113540_bib0001","doi-asserted-by":"crossref","first-page":"592","DOI":"10.1097\/RLI.0000000000000696","article-title":"Developments in X-ray contrast media and the potential impact on computed tomography","volume":"55","author":"Sch\u00f6ckel","year":"2020","journal-title":"Invest. Radiol."},{"key":"10.1016\/j.patcog.2026.113540_bib0002","series-title":"High-end global computed tomography purchases to propel the high-end CT segment revenue","author":"Fernandez","year":"2021"},{"key":"10.1016\/j.patcog.2026.113540_bib0003","series-title":"Radiology facing a global shortage","author":"Henderson","year":"2022"},{"key":"10.1016\/j.patcog.2026.113540_bib0004","series-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision","first-page":"958","article-title":"A survey on multimodal large language models for autonomous driving","author":"Cui","year":"2024"},{"key":"10.1016\/j.patcog.2026.113540_bib0005","series-title":"2023 IEEE International Conference on Big Data (BigData)","first-page":"2247","article-title":"Multimodal large language models: a survey","author":"Wu","year":"2023"},{"key":"10.1016\/j.patcog.2026.113540_bib0006","series-title":"2022 Conference on Empirical Methods in Natural Language Processing","article-title":"MedCLIP: contrastive learning from unpaired medical images and text","author":"Wang","year":"2022"},{"key":"10.1016\/j.patcog.2026.113540_bib0007","series-title":"BiomedCLIP: a multimodal biomedical foundation model pretrained from fifteen million scientific image-text pairs","author":"Zhang","year":"2023"},{"key":"10.1016\/j.patcog.2026.113540_bib0008","series-title":"Proceedings of the 23rd Workshop on Biomedical Natural Language Processing","first-page":"440","article-title":"XrayGPT: chest radiographs summarization using large medical vision-language models","author":"Thawakar","year":"2024"},{"key":"10.1016\/j.patcog.2026.113540_bib0009","first-page":"28541","article-title":"LLaVA-med: training a large language-and-vision assistant for biomedicine in one day","volume":"36","author":"Li","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113540_bib0010","series-title":"Machine Learning for Health (ML4H)","first-page":"353","article-title":"Med-flamingo: a multimodal medical few-shot learner","author":"Moor","year":"2023"},{"key":"10.1016\/j.patcog.2026.113540_bib0011","series-title":"The 62st Annual Meeting of The Association For Computational Linguistics","article-title":"Fine-grained image-text alignment in medical imaging enables cyclic image-report generation","author":"Chen","year":"2024"},{"issue":"4","key":"10.1016\/j.patcog.2026.113540_bib0012","doi-asserted-by":"crossref","first-page":"577","DOI":"10.1016\/S0097-8493(02)00103-6","article-title":"Trends in medical imaging: from 2D to 3D","volume":"26","author":"Sakas","year":"2002","journal-title":"Compu. Graph."},{"key":"10.1016\/j.patcog.2026.113540_bib0013","series-title":"M3D: advancing 3D medical image analysis with multi-modal large language models","author":"Bai","year":"2024"},{"key":"10.1016\/j.patcog.2026.113540_bib0014","series-title":"Proceedings of the Asian Conference on Computer Vision","first-page":"2404","article-title":"MedBLIP: bootstrapping language-image pre-training from 3D medical images and texts","author":"Chen","year":"2024"},{"key":"10.1016\/j.patcog.2026.113540_bib0015","series-title":"Benchmarking and boosting radiology report generation for 3D high-resolution medical images","author":"Liu","year":"2024"},{"key":"10.1016\/j.patcog.2026.113540_bib0016","series-title":"The Concise Human Body Book: An Illustrated Guide to Its Structure, Function and Disorders","author":"Parker","year":"2009"},{"key":"10.1016\/j.patcog.2026.113540_bib0017","doi-asserted-by":"crossref","first-page":"44","DOI":"10.1016\/j.media.2019.04.002","article-title":"Computational anatomy for multi-organ analysis in medical imaging: a review","volume":"56","author":"Cerrolaza","year":"2019","journal-title":"Med. Image Anal."},{"key":"10.1016\/j.patcog.2026.113540_bib0018","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s12974-016-0736-y","article-title":"Multiple organ dysfunction and systemic inflammation after spinal cord injury: a complex relationship","volume":"13","author":"Sun","year":"2016","journal-title":"J. Neuroinflammation"},{"key":"10.1016\/j.patcog.2026.113540_bib0019","first-page":"34892","article-title":"Visual instruction tuning","volume":"36","author":"Liu","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113540_bib0020","series-title":"PaLM-E an embodied multimodal language model","author":"Driess","year":"2023"},{"key":"10.1016\/j.patcog.2026.113540_bib0021","first-page":"23","article-title":"Flamingo: a visual language model for few-shot learning","volume":"35","author":"Alayrac","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113540_bib0022","series-title":"AAAI 2024 Spring Symposium on Clinical Foundation Models","article-title":"CheXagent: towards a foundation model for chest X-ray interpretation","author":"Chen","year":"2024"},{"key":"10.1016\/j.patcog.2026.113540_bib0023","series-title":"Maira-1: a specialised large multimodal model for radiology report generation","author":"Hyland","year":"2023"},{"key":"10.1016\/j.patcog.2026.113540_bib0024","series-title":"MediVLM: a vision language model for radiology report generation from medical images","first-page":"10287","volume":"2025","author":"Goswami","year":"2025"},{"key":"10.1016\/j.patcog.2026.113540_bib0025","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"755","article-title":"MedUnifier: unifying vision-and-language pre-training on medical data with vision generation task using discrete visual representations","author":"Zhang","year":"2025"},{"key":"10.1016\/j.patcog.2026.113540_bib0026","series-title":"OmniV-Med: scaling medical vision-language model for universal visual understanding","author":"Jiang","year":"2025"},{"key":"10.1016\/j.patcog.2026.113540_bib0027","series-title":"Merlin: a computed tomography vision-language foundation model and dataset","first-page":"1","author":"Blankemeier","year":"2026"},{"key":"10.1016\/j.patcog.2026.113540_bib0028","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"476","article-title":"CT2Rep: automated radiology report generation for 3d medical imaging","author":"Hamamci","year":"2024"},{"issue":"1","key":"10.1016\/j.patcog.2026.113540_bib0029","doi-asserted-by":"crossref","first-page":"7866","DOI":"10.1038\/s41467-025-62385-7","article-title":"Towards generalist foundation model for radiology by leveraging web-scale 2d&3d medical data","volume":"16","author":"Wu","year":"2025","journal-title":"Nat. Commun."},{"key":"10.1016\/j.patcog.2026.113540_bib0030","series-title":"A foundation model utilizing chest ct volumes and radiology reports for supervised-level zero-shot detection of abnormalities","author":"Hamamci","year":"2024"},{"key":"10.1016\/j.patcog.2026.113540_bib0031","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"484","article-title":"RadAlign: advancing radiology report generation with vision-language concept alignment","author":"Gu","year":"2025"},{"key":"10.1016\/j.patcog.2026.113540_bib0032","article-title":"Efficient 3D representation learning for medical image analysis","volume":"2","author":"Tang","year":"2024","journal-title":"World Sci. Annu. Rev. Artif. Intell."},{"key":"10.1016\/j.patcog.2026.113540_bib0033","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2024.103226","article-title":"Universal and extensible language-vision models for organ segmentation and tumor detection from abdominal computed tomography","volume":"97","author":"Liu","year":"2024","journal-title":"Med. Image Anal."},{"issue":"5","key":"10.1016\/j.patcog.2026.113540_bib0034","doi-asserted-by":"crossref","first-page":"8525","DOI":"10.1109\/TNNLS.2024.3409573","article-title":"Unsupervised domain adaptation for low-dose CT reconstruction via Bayesian uncertainty alignment","volume":"36","author":"Chen","year":"2024","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.patcog.2026.113540_bib0035","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"97","article-title":"Edge-oriented point-cloud transformer for 3D intracranial aneurysm segmentation","author":"Liu","year":"2022"},{"key":"10.1016\/j.patcog.2026.113540_bib0036","first-page":"36620","article-title":"AbdomenAtlas-8K: annotating 8,000 CT volumes for multi-organ segmentation in three weeks","volume":"36","author":"Qu","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"3","key":"10.1016\/j.patcog.2026.113540_bib0037","doi-asserted-by":"crossref","DOI":"10.1148\/radiol.231362","article-title":"Potential of ChatGPT and GPT -4 for data mining of free-text CT reports on lung cancer","volume":"308","author":"Fink","year":"2023","journal-title":"Radiology"},{"issue":"9","key":"10.1016\/j.patcog.2026.113540_bib0038","doi-asserted-by":"crossref","first-page":"3786","DOI":"10.1109\/TNNLS.2021.3099165","article-title":"Medical-VLBERT: medical visual language BERT for COVID-19 CT report generation with alternate learning","volume":"32","author":"Liu","year":"2021","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"3","key":"10.1016\/j.patcog.2026.113540_bib0039","doi-asserted-by":"crossref","first-page":"1375","DOI":"10.1007\/s11263-024-02246-w","article-title":"Bi-VLGM: Bi-level class-severity-aware vision-language graph matching for text guided medical image segmentation","volume":"133","author":"Chen","year":"2025","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.patcog.2026.113540_bib0040","series-title":"2021 IEEE 18th International Symposium on Biomedical Imaging (ISBI)","first-page":"1650","article-title":"SLAKE: a semantically-labeled knowledge-enhanced dataset for medical visual question answering","author":"Liu","year":"2021"},{"key":"10.1016\/j.patcog.2026.113540_bib0041","series-title":"RadGenome-Chest CT: a grounded vision-language dataset for chest ct analysis","author":"Zhang","year":"2024"},{"key":"10.1016\/j.patcog.2026.113540_bib0042","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"21","article-title":"CLIP-driven universal model for organ segmentation and tumor detection","author":"Liu","year":"2023"},{"key":"10.1016\/j.patcog.2026.113540_bib0043","first-page":"110","article-title":"SegVol: universal and interactive volumetric medical image segmentation","volume":"37","author":"Du","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113540_bib0044","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.112179","article-title":"SegMIC: a universal model for medical image segmentation through in-context learning","volume":"171","author":"Zhao","year":"2026","journal-title":"Pattern Recognit"},{"key":"10.1016\/j.patcog.2026.113540_bib0045","first-page":"840","article-title":"Show and segment: universal medical image segmentation via in-context learning","volume":"20","author":"Gao","year":"2025","journal-title":"Proceedings of the Computer Vision and Pattern Recognition Conference"},{"issue":"1","key":"10.1016\/j.patcog.2026.113540_bib0046","doi-asserted-by":"crossref","DOI":"10.1038\/s41746-025-01964-w","article-title":"Large-vocabulary segmentation for medical images with text prompts","volume":"8","author":"Zhao","year":"2025","journal-title":"npj Digit. Med."},{"issue":"1","key":"10.1016\/j.patcog.2026.113540_bib0047","doi-asserted-by":"crossref","first-page":"654","DOI":"10.1038\/s41467-024-44824-z","article-title":"Segment anything in medical images","volume":"15","author":"Ma","year":"2024","journal-title":"Nat. Commun."},{"issue":"10","key":"10.1016\/j.patcog.2026.113540_bib0048","doi-asserted-by":"crossref","first-page":"17599","DOI":"10.1109\/TNNLS.2025.3586694","article-title":"SAM-Med3D: a vision foundation model for general-purpose segmentation on volumetric medical images","volume":"36","author":"Wang","year":"2025","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.patcog.2026.113540_bib0049","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"4015","article-title":"Segment anything","author":"Kirillov","year":"2023"},{"key":"10.1016\/j.patcog.2026.113540_bib0050","first-page":"36","article-title":"AMOS: a large-scale abdominal multi-organ benchmark for versatile medical image segmentation","volume":"35","author":"Ji","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113540_bib0051","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2024.103370","article-title":"MedLSAM: localize and segment anything model for 3D CT images","volume":"99","author":"Lei","year":"2025","journal-title":"Med. Image Anal."},{"key":"10.1016\/j.patcog.2026.113540_bib0052","volume":"2","author":"Cheng","year":"2023","journal-title":"Sam-med2d"},{"key":"10.1016\/j.patcog.2026.113540_bib0053","series-title":"GPT-4 technical report","author":"Achiam","year":"2023"}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326005066?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326005066?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T13:10:53Z","timestamp":1780492253000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326005066"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,11]]},"references-count":53,"alternative-id":["S0031320326005066"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113540","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Enhancing 3D medical multi-modal large language models with integrated human body priors for computed tomography","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113540","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"113540"}}