{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T12:14:34Z","timestamp":1775564074515,"version":"3.50.1"},"reference-count":43,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T00:00:00Z","timestamp":1772755200000},"content-version":"vor","delay-in-days":64,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001807","name":"FAPESP","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001807","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002322","name":"CAPES","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002322","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003593","name":"CNPq","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003593","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003758","name":"FAPEMA","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003758","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Procedia Computer Science"],"published-print":{"date-parts":[[2026]]},"DOI":"10.1016\/j.procs.2026.03.082","type":"journal-article","created":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T12:39:40Z","timestamp":1774355980000},"page":"1045-1053","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Relational Memory Multimodal Models for Automated X-Ray Medical Reporting"],"prefix":"10.1016","volume":"278","author":[{"suffix":"Junior","given":"Gilvan Veras","family":"Magalh\u00e3es","sequence":"first","affiliation":[]},{"given":"Pedro","family":"de Alc\u00e2ntara dos Santos Neto","sequence":"additional","affiliation":[]},{"given":"Anselmo Cardoso","family":"de Paiva","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.procs.2026.03.082_bib1","doi-asserted-by":"crossref","first-page":"103224","DOI":"10.1016\/j.media.2024.103224","article-title":"Towards long-tailed, multi-label disease classification from chest X-ray: Overview of the CXR-LT challenge","volume":"97","author":"Holste","year":"2024","journal-title":"Medical Image Analysis"},{"key":"10.1016\/j.procs.2026.03.082_bib2","doi-asserted-by":"crossref","first-page":"100557","DOI":"10.1016\/j.imu.2021.100557","article-title":"Automated radiology report generation using conditioned transformers","volume":"24","author":"Alfarghaly","year":"2021","journal-title":"Informatics in Medicine Unlocked"},{"key":"10.1016\/j.procs.2026.03.082_bib3","doi-asserted-by":"crossref","first-page":"107856","DOI":"10.1016\/j.patcog.2021.107856","article-title":"Automatic medical image interpretation: State of the art and future directions","volume":"114","author":"Ayesha","year":"2021","journal-title":"Pattern Recognition"},{"issue":"1","key":"10.1016\/j.procs.2026.03.082_bib4","doi-asserted-by":"crossref","first-page":"102","DOI":"10.1067\/j.cpradiol.2023.04.001","article-title":"Ability of ChatGPT to generate competent radiology reports for distal radius fracture by use of RSNA template items and integrated AO classifier","volume":"53","author":"Bosbach","year":"2023","journal-title":"Current Problems in Diagnostic Radiology"},{"key":"10.1016\/j.procs.2026.03.082_bib5","doi-asserted-by":"crossref","first-page":"102342","DOI":"10.1016\/j.compmedimag.2024.102342","article-title":"Medical report generation based on multimodal federated learning","volume":"113","author":"Chen","year":"2024","journal-title":"Computerized Medical Imaging and Graphics"},{"key":"10.1016\/j.procs.2026.03.082_bib6","doi-asserted-by":"crossref","first-page":"102823","DOI":"10.1016\/j.artmed.2024.102823","article-title":"A label information fused medical image report generation framework","volume":"150","author":"Sun","year":"2024","journal-title":"Artificial Intelligence in Medicine"},{"key":"10.1016\/j.procs.2026.03.082_bib7","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., & Guo, B. (2021). Swin Transformer: Hierarchical Vision Transformer using Shifted Windows. arXiv (Cornell University). https:\/\/doi.org\/10.48550\/arxiv.2103.14030","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"10.1016\/j.procs.2026.03.082_bib8","doi-asserted-by":"crossref","unstructured":"Lovelace, J., & Mortazavi, B. (2020). Learning to generate clinically coherent chest X-ray reports. In T. Cohn, Y. He, & Y. Liu (Eds.), Findings of the Association for Computational Linguistics: EMNLP 2020 (pp. 1235\u20131243). Association for Computational Linguistics. https:\/\/aclanthology.org\/2020.findings-emnlp.110\/ https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.110","DOI":"10.18653\/v1\/2020.findings-emnlp.110"},{"key":"10.1016\/j.procs.2026.03.082_bib9","doi-asserted-by":"crossref","unstructured":"Agarwal, S., Yashwanth, D., Arya, K. V., Rodriguez, R. C., & Lezama, P. (2025). Enhanced Bidirectional GRUs with Self-attention for Medical Image Report Generation: A Deep Learning Approach. In Lecture notes in networks and systems (pp. 71\u201379). https:\/\/doi.org\/10.1007\/978-981-97-3859-5_7","DOI":"10.1007\/978-981-97-3859-5_7"},{"key":"10.1016\/j.procs.2026.03.082_bib10","doi-asserted-by":"crossref","unstructured":"Mohan, Y. R., Satyanarayana, D., Babu, R. S., Ahmed, K. A., & Satyanarayana, M. S. (2025). Deep Convolutional Neural Network based Solution for Detection of COVID-19 from Chest X-Ray Images. In Advances in engineering research\/Advances in Engineering Research (pp. 283\u2013298). https:\/\/doi.org\/10.2991\/978-94-6463-662-8_23","DOI":"10.2991\/978-94-6463-662-8_23"},{"key":"10.1016\/j.procs.2026.03.082_bib11","series-title":"Tuberculosis identification and detection application using deep learning\u2014cloud based web. In Applications of Mathematics in Science and Technology (pp. 240-247)","author":"Rajeswari","year":"2026"},{"key":"10.1016\/j.procs.2026.03.082_bib12","series-title":"Paediatric Pneumonia chest X-ray image classification with association to Lung cancer disease using ResNet50 Deep Learning Model. In 2024 IEEE International Conference on Big Data (BigData) (pp. 8859-8861)","author":"Onah","year":"2024"},{"key":"10.1016\/j.procs.2026.03.082_bib13","series-title":"A Hybrid CNN-RNN Model for Enhanced Pneumonia Detection using X-Ray Imaging. In 2024 First International Conference on Software, Systems and Information Technology (SSITCON) (pp. 1-5)","author":"Mohammad","year":"2024"},{"key":"10.1016\/j.procs.2026.03.082_bib14","series-title":"Natural language processing for chest X\u2010ray reports in the transformer era: BERT\u2010like encoders for comprehension and GPT\u2010like decoders for generation","author":"Yuan","year":"2025"},{"issue":"2","key":"10.1016\/j.procs.2026.03.082_bib15","doi-asserted-by":"crossref","first-page":"e70062","DOI":"10.1002\/ima.70062","article-title":"Generating Medical Reports With a Novel Deep Learning Architecture","volume":"35","author":"Ucan","year":"2025","journal-title":"International Journal of Imaging Systems and Technology"},{"key":"10.1016\/j.procs.2026.03.082_bib16","doi-asserted-by":"crossref","first-page":"109708","DOI":"10.1016\/j.compbiomed.2025.109708","article-title":"ATEDU-NET: An Attention-Embedded Deep Unet for multi-disease diagnosis in chest X-ray images, breast ultrasound, and retina fundus","volume":"186","author":"Ejiyi","year":"2025","journal-title":"Computers in Biology and Medicine"},{"key":"10.1016\/j.procs.2026.03.082_bib17","series-title":"Improving the CXR Reports Generation with Multi-modal feature Alignment and Self-Refining strategy. In 2024 3rd International Conference on Embedded Systems and Artificial Intelligence (ESAI) (pp. 1-7)","author":"Cheddi","year":"2024"},{"key":"10.1016\/j.procs.2026.03.082_bib18","doi-asserted-by":"crossref","unstructured":"Zhou, L. (2024, October). Automated Medical Report Generation and Visual Question Answering. In Proceedings of the 1st International Workshop on Multimedia Computing for Health and Medicine (pp. 3-4).","DOI":"10.1145\/3688868.3689189"},{"key":"10.1016\/j.procs.2026.03.082_bib19","series-title":"Combining CNN and Transformer for Enhancing Medical Image Captioning. In 2024 Sixth International Conference on Intelligent Computing in Data Sciences (ICDS) (pp. 1-5)","author":"El Medhoune","year":"2024"},{"key":"10.1016\/j.procs.2026.03.082_bib20","doi-asserted-by":"crossref","unstructured":"Veras Magalh\u00e3es, G., L de S Santos, R., H S Vogado, L., Cardoso de Paiva, A., & de Alc\u00e2ntara Dos Santos Neto, P. (2024). XRaySwinGen: Automatic medical reporting for X-ray exams with multimodal model. Heliyon, 10(7), e27516. https:\/\/doi.org\/10.1016\/j.heliyon.2024.e27516","DOI":"10.1016\/j.heliyon.2024.e27516"},{"key":"10.1016\/j.procs.2026.03.082_bib21","unstructured":"Islam, M. R., Hossain, M. Z., Ahmed, M., & Samu, M. S. S. (2025). Vision-Language models for automated chest x-ray interpretation: leveraging VIT and GPT-2. arXiv (Cornell University). https:\/\/doi.org\/10.48550\/arxiv.2501.12356"},{"key":"10.1016\/j.procs.2026.03.082_bib22","doi-asserted-by":"crossref","unstructured":"Singh, P., & Singh, S. (2025). ChestX-Transcribe: a multimodal transformer for automated radiology report generation from chest x-rays. Frontiers in Digital Health, 7. https:\/\/doi.org\/10.3389\/fdgth.2025.1535168","DOI":"10.3389\/fdgth.2025.1535168"},{"key":"10.1016\/j.procs.2026.03.082_bib23","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1007\/978-981-99-8628-6_20","article-title":"Transformer-Based Chest X-ray Report Generation Model Check for updates","volume":"1","author":"Shaikh","year":"2024","journal-title":"Soft Computing and Signal Processing: Proceedings of 6th ICSCSP 2023, Volume 1"},{"key":"10.1016\/j.procs.2026.03.082_bib24","doi-asserted-by":"crossref","unstructured":"Nimalsiri, W., Hennayake, M., Rathnayake, K., Ambegoda, T. D., & Meedeniya, D. (2023). Automated Radiology Report Generation Using Transformers (pp. 90\u201395). IEEE. https:\/\/doi.org\/10.1109\/icarc57651.2023.10145699","DOI":"10.1109\/ICARC57651.2023.10145699"},{"key":"10.1016\/j.procs.2026.03.082_bib25","series-title":"Quantitative Evaluation of Multimodal LLMs in Pediatric Radiology Report Generation","author":"Ding","year":"2026"},{"key":"10.1016\/j.procs.2026.03.082_bib26","series-title":"MetaGP: A Generative Foundation Model Integrating Electronic Health Records and Multimodal Imaging for Addressing Unmet Clinical Needs","author":"Liu","year":"2026"},{"key":"10.1016\/j.procs.2026.03.082_bib27","series-title":"Utility of Multimodal Large Language Models in Analyzing Chest X-ray With Incomplete Contextual Information. In C22. ARTIFICIAL INTELLIGENCE IN THE ICU: THE MACHINE WILL SEE YOU NOW (pp. A5061-A5061)","author":"Yoon","year":"2024"},{"key":"10.1016\/j.procs.2026.03.082_bib28","unstructured":"Johnson, A., Lungren, M., Peng, Y., Lu, Z., Mark, R., Berkowitz, S., & Horng, S. (2024). MIMIC-CXR-JPG - chest radiographs with structured labels (version 2.1.0). PhysioNet. https:\/\/doi.org\/10.13026\/jsn5-t979."},{"key":"10.1016\/j.procs.2026.03.082_bib29","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., \u2026 & Houlsby, N. (2020). An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929."},{"key":"10.1016\/j.procs.2026.03.082_bib30","unstructured":"Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., Desmaison, A., K\u00f6pf, A., Yang, E., DeVito, Z., Raison, M., Tejani, A., Chilamkurthy, S., Steiner, B., Fang, L., \u2026 Chintala, S. (2019). PyTorch: An Imperative Style, High-Performance Deep Learning Library. arXiv (Cornell University). https:\/\/doi.org\/10.48550\/arxiv.1912.01703"},{"key":"10.1016\/j.procs.2026.03.082_bib31","unstructured":"Kingma, D. P., & Ba, J. (2014). Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980."},{"key":"10.1016\/j.procs.2026.03.082_bib32","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., & Zhu, W. J. (2002, July). Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics (pp. 311-318).","DOI":"10.3115\/1073083.1073135"},{"key":"10.1016\/j.procs.2026.03.082_bib33","unstructured":"Lin, C. Y. (2004, July). Rouge: A package for automatic evaluation of summaries. In Text summarization branches out (pp. 74-81)."},{"key":"10.1016\/j.procs.2026.03.082_bib34","unstructured":"Banerjee, S., & Lavie, A. (2004). Meteor: an automatic metric for mt evaluation with high levels of correlation with human judgments. Proceedings of ACL-WMT, 65-72."},{"key":"10.1016\/j.procs.2026.03.082_bib35","series-title":"Spice: Semantic propositional image caption evaluation. In Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11-14, 2016, Proceedings, Part V 14 (pp. 382-398)","author":"Anderson","year":"2016"},{"key":"10.1016\/j.procs.2026.03.082_bib36","doi-asserted-by":"crossref","first-page":"101878","DOI":"10.1016\/j.artmed.2020.101878","article-title":"Deep learning in generating radiology reports: A survey","volume":"106","author":"Monshi","year":"2020","journal-title":"Artificial Intelligence in Medicine"},{"key":"10.1016\/j.procs.2026.03.082_bib37","doi-asserted-by":"crossref","first-page":"102125","DOI":"10.1016\/j.media.2021.102125","article-title":"Deep learning for chest X-ray analysis: A survey","volume":"72","author":"\u00c7all\u0131","year":"2021","journal-title":"Medical image analysis"},{"issue":"1","key":"10.1016\/j.procs.2026.03.082_bib38","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1186\/s12938-023-01113-y","article-title":"A survey on automatic generation of medical imaging reports based on deep learning","volume":"22","author":"Pang","year":"2023","journal-title":"BioMedical Engineering OnLine"},{"key":"10.1016\/j.procs.2026.03.082_bib39","series-title":"Generating radiology reports via memory-driven transformer","first-page":"1439","author":"Chen","year":"2020"},{"key":"10.1016\/j.procs.2026.03.082_bib40","series-title":"Aligntransformer: Hierarchical alignment of visual regions and disease tags for medical report generation. MICCAI, pp. 72-82","author":"You","year":"2021"},{"key":"10.1016\/j.procs.2026.03.082_bib41","doi-asserted-by":"crossref","unstructured":"A. Nicolson, J. Dowling, and B. Koopman (2022). Improving chest x-ray report generation by leveraging warm starting. arXiv:2201.09405.","DOI":"10.1016\/j.artmed.2023.102633"},{"key":"10.1016\/j.procs.2026.03.082_bib42","doi-asserted-by":"crossref","unstructured":"Tanida, T., M\u00fcller, P., Kaissis, G., & Rueckert, D. (2023). Interactive and explainable region-guided radiology report generationIn Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (pp. 7433-7442).","DOI":"10.1109\/CVPR52729.2023.00718"},{"key":"10.1016\/j.procs.2026.03.082_bib43","unstructured":"Wang, H., Ye, S., Lin, J., Naseem, U., & Kim, J. (2025). LVMed-R2: Perception and Reflection-driven Complex Reasoning for Medical Report Generation. arXiv preprint arXiv:2504.02885."}],"container-title":["Procedia Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1877050926006770?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1877050926006770?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T11:32:57Z","timestamp":1775561577000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1877050926006770"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":43,"alternative-id":["S1877050926006770"],"URL":"https:\/\/doi.org\/10.1016\/j.procs.2026.03.082","relation":{},"ISSN":["1877-0509"],"issn-type":[{"value":"1877-0509","type":"print"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Relational Memory Multimodal Models for Automated X-Ray Medical Reporting","name":"articletitle","label":"Article Title"},{"value":"Procedia Computer Science","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.procs.2026.03.082","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Author(s). Published by Elsevier B.V.","name":"copyright","label":"Copyright"}]}}