{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:51:27Z","timestamp":1757620287691,"version":"3.44.0"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031995675"},{"type":"electronic","value":"9783031995682"}],"license":[{"start":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T00:00:00Z","timestamp":1753747200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T00:00:00Z","timestamp":1753747200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-031-99568-2_16","type":"book-chapter","created":{"date-parts":[[2025,7,28]],"date-time":"2025-07-28T15:16:35Z","timestamp":1753715795000},"page":"196-207","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Optimizing Medical Image Captioning with\u00a0Conditional Prompt Encoding"],"prefix":"10.1007","author":[{"given":"Rendson F.","family":"Fernandes","sequence":"first","affiliation":[]},{"given":"Hugo S.","family":"Oliveira","sequence":"additional","affiliation":[]},{"given":"Pedro P.","family":"Ribeiro","sequence":"additional","affiliation":[]},{"given":"H\u00e9lder P.","family":"Oliveira","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,29]]},"reference":[{"issue":"8","key":"16_CR1","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"16_CR2","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: a neural image caption generator. CoRR, vol.\u00a0abs\/1411.4555 (2014)","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"16_CR3","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Guyon, I., et al. (eds.) Advances in Neural Information Processing Systems, vol.\u00a030, Curran Associates, Inc. (2017)"},{"key":"16_CR4","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. CoRR, vol.\u00a0abs\/2103.00020 (2021)"},{"key":"16_CR5","unstructured":"Zhang, J., Zhang, X., Yu, S., et\u00a0al.: CXR-clip: toward large scale chest x-ray language-image pre-training. arXiv preprint arXiv:2302.04537 (2023)"},{"key":"16_CR6","unstructured":"Johnson, A.E., et al.: Mimic-CXR: a large publicly available database of labeled chest radiographs. In: Proceedings of the 9th International Conference on Artificial Intelligence in Medicine, pp.\u00a0559\u2013567. Springer, Cham (2019)"},{"key":"16_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"180","DOI":"10.1007\/978-3-030-01364-6_20","volume-title":"Intravascular Imaging and Computer Assisted Stenting and Large-Scale Annotation of Biomedical Data and Expert Label Synthesis","author":"O Pelka","year":"2018","unstructured":"Pelka, O., Koitka, S., R\u00fcckert, J., Nensa, F., Friedrich, C.M.: Radiology Objects in COntext (ROCO): a multimodal image dataset. In: Stoyanov, D., et al. (eds.) LABELS\/CVII\/STENT -2018. LNCS, vol. 11043, pp. 180\u2013189. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01364-6_20"},{"key":"16_CR8","doi-asserted-by":"crossref","unstructured":"Rotstein, N., Bensa\u00efd, D., Brody, S., Ganz, R., Kimmel, R.: Fusecap: leveraging large language models for enriched fused image captions. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp.\u00a05689\u20135700 (2024)","DOI":"10.1109\/WACV57701.2024.00559"},{"key":"16_CR9","doi-asserted-by":"crossref","unstructured":"Fang, X., Lin, Y., Zhang, D., Cheng, K.-T., Chen, H.: Aligning medical images with general knowledge from large language models. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp.\u00a057\u201367. Springer, Cham (2024)","DOI":"10.1007\/978-3-031-72117-5_6"},{"key":"16_CR10","unstructured":"Gu, D., Gao, Y., Zhou, Y., Zhou, M., Metaxas, D.: Radalign: advancing radiology report generation with vision-language concept alignment. arXiv preprint arXiv:2501.07525 (2025)"},{"key":"16_CR11","doi-asserted-by":"publisher","DOI":"10.2196\/59505","volume":"26","author":"R AlSaad","year":"2024","unstructured":"AlSaad, R., et al.: Multimodal large language models in health care: applications, challenges, and future outlook. J. Med. Internet Res. 26, e59505 (2024)","journal-title":"J. Med. Internet Res."},{"key":"16_CR12","doi-asserted-by":"crossref","unstructured":"Oakden-Rayner, L., Dunnmon, J., Carneiro, G., R\u00e9, C.: Hidden stratification causes clinically meaningful failures in machine learning for medical imaging. In: Proceedings of the ACM Conference on Health, Inference, and Learning, pp.\u00a0151\u2013159 (2020)","DOI":"10.1145\/3368555.3384468"},{"key":"16_CR13","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp.\u00a05998\u20136008 (2017)"},{"key":"16_CR14","unstructured":"Zhang, J., Liu, X., Xu, X.: Contrastive learning for medical image captioning. arXiv preprint arXiv:2009.12357 (2020)"},{"key":"16_CR15","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp.\u00a08748\u20138763. PMLR (2021)"},{"key":"16_CR16","doi-asserted-by":"crossref","unstructured":"Lester, B., Al-Rfou, R., Constant, N.: The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"16_CR17","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, J., Loy, C.C., Liu, Z.: Conditional prompt learning for vision-language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.\u00a016816\u201316825 (2022)","DOI":"10.1109\/CVPR52688.2022.01631"},{"key":"16_CR18","unstructured":"Chiang, W.-L., et\u00a0al.: Vicuna: an open-source chatbot impressing GPT-4 with 90%* chatgpt quality, vol.\u00a03, no.\u00a05 (2023). https:\/\/lmsys.org\/blog\/2023-03-30-vicuna"},{"key":"16_CR19","unstructured":"Wang, Z., Shi, Y., Liu, Y., Chen, W., Cai, P.: Promptmrg: diagnosis-driven prompting for medical report generation. arXiv preprint arXiv:2308.12604 (2023)"},{"key":"16_CR20","doi-asserted-by":"crossref","unstructured":"Jin, H., Che, H., Lin, Y., Chen, H.: Promptmrg: diagnosis-driven prompts for medical report generation (2024)","DOI":"10.1609\/aaai.v38i3.28038"},{"key":"16_CR21","unstructured":"Liu, P., Yuan, W., Fu, J., Jiang, Z., Hayashi, H., Neubig, G.: Pre-train, prompt, and predict: a systematic survey of prompting methods in natural language processing. CoRR, vol.\u00a0abs\/2107.13586 (2021)"},{"key":"16_CR22","doi-asserted-by":"crossref","unstructured":"Liu, X., et al.: GPT understands, too. CoRR (2023)","DOI":"10.1016\/j.aiopen.2023.08.012"},{"key":"16_CR23","doi-asserted-by":"crossref","unstructured":"Zhang, R., Tao, C., Song, M.: Rstnet: captioning with adaptive attention on visual and non-visual words. IEEE Trans. Pattern Anal. Mach. Intell. (2021)","DOI":"10.1109\/CVPR46437.2021.01521"},{"key":"16_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"260","DOI":"10.1007\/978-3-030-28577-7_22","volume-title":"Experimental IR Meets Multilinguality, Multimodality, and Interaction","author":"X Wang","year":"2019","unstructured":"Wang, X., Guo, Z., Zhang, Yu., Li, J.: Medical image labelling and semantic understanding for clinical applications. In: Crestani, F., et al. (eds.) CLEF 2019. LNCS, vol. 11696, pp. 260\u2013270. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-28577-7_22"},{"key":"16_CR25","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.107856","volume":"114","author":"H Ayesha","year":"2021","unstructured":"Ayesha, H., et al.: Automatic medical image interpretation: state of the art and future directions. Pattern Recogn. 114, 107856 (2021)","journal-title":"Pattern Recogn."},{"key":"16_CR26","doi-asserted-by":"crossref","unstructured":"Wang, X., Peng, Y., Lu, L., Lu, Z., Summers, R.M.: Tienet: text-image embedding network for common thorax disease classification and reporting in chest x-rays (2018)","DOI":"10.1109\/CVPR.2018.00943"},{"key":"16_CR27","unstructured":"Charalampakos, F., Karatzas, V., Kougia, V., Pavlopoulos, J., Androutsopoulos, I.: Aueb NLP group at imageclefmed caption tasks 2021. In: CLEF (Working Notes), pp.\u00a01184\u20131200 (2021)"},{"key":"16_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1007\/978-3-030-59713-9_54","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2020","author":"T Syeda-Mahmood","year":"2020","unstructured":"Syeda-Mahmood, T., et al.: Chest x-ray report generation through fine-grained label learning. In: Martel, A.L., et al. (eds.) MICCAI 2020. LNCS, vol. 12262, pp. 561\u2013571. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-59713-9_54"},{"issue":"7","key":"16_CR29","doi-asserted-by":"publisher","first-page":"1691","DOI":"10.1007\/s10115-022-01684-7","volume":"64","author":"J Pavlopoulos","year":"2022","unstructured":"Pavlopoulos, J., Kougia, V., Androutsopoulos, I., Papamichail, D.: Diagnostic captioning: a survey. Knowl. Inf. Syst. 64(7), 1691\u20131722 (2022)","journal-title":"Knowl. Inf. Syst."},{"key":"16_CR30","unstructured":"Alfarghaly, A., Elhosseiny, M., Maher, M., Tanveer, M., Wang, C.: Conditioned transformer language models for clinical text generation. In: Proceedings of the 20th Workshop on Biomedical Language Processing, pp.\u00a0100\u2013109 (2021)"},{"key":"16_CR31","doi-asserted-by":"crossref","unstructured":"Feng, Y., Li, L., Xiang, Y., Qin, X.: PromptCL: improving event representation via prompt template and contrastive learning. arXiv preprint arXiv:2404.17877 (2023)","DOI":"10.1007\/978-3-031-44693-1_21"},{"key":"16_CR32","doi-asserted-by":"publisher","first-page":"19522","DOI":"10.1007\/s10489-023-04453-3","volume":"53","author":"O Jia","year":"2023","unstructured":"Jia, O., Huang, H., Ren, J., Xie, L., Xiao, Y.: Contrastive learning with text augmentation for text classification. Appl. Intell. 53, 19522\u201319531 (2023)","journal-title":"Appl. Intell."},{"key":"16_CR33","doi-asserted-by":"crossref","unstructured":"Yuan, H., Yuan, Z., Gan, R., Zhang, J., Xie, Y., Yu, S.: Biobart: pretraining and evaluation of a biomedical generative language model. arXiv preprint arXiv:2204.03905 (2022)","DOI":"10.18653\/v1\/2022.bionlp-1.9"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Image Analysis"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-99568-2_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T05:53:24Z","timestamp":1757310804000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-99568-2_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,29]]},"ISBN":["9783031995675","9783031995682"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-99568-2_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,7,29]]},"assertion":[{"value":"29 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IbPRIA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Iberian Conference on Pattern Recognition and Image Analysis","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Coimbra","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ibpria2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ibpria.org\/2025\/?page=home","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}