{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T22:00:31Z","timestamp":1777759231275,"version":"3.51.4"},"publisher-location":"Cham","reference-count":52,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031966248","type":"print"},{"value":"9783031966255","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,8,7]],"date-time":"2025-08-07T00:00:00Z","timestamp":1754524800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,7]],"date-time":"2025-08-07T00:00:00Z","timestamp":1754524800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-031-96625-5_15","type":"book-chapter","created":{"date-parts":[[2025,8,6]],"date-time":"2025-08-06T07:10:10Z","timestamp":1754464210000},"page":"218-233","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Structure Observation Driven Image-Text Contrastive Learning for\u00a0Computed Tomography Report Generation"],"prefix":"10.1007","author":[{"given":"Hong","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dong","family":"Wei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiong","family":"Peng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yawen","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xian","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yefeng","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liansheng","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,8,7]]},"reference":[{"key":"15_CR1","unstructured":"Banerjee, S., Lavie, A.: METEOR: an automatic metric for MT evaluation with improved correlation with human judgments. In: Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization, pp. 65\u201372 (2005)"},{"issue":"1","key":"15_CR2","first-page":"3","volume":"81","author":"A Brady","year":"2012","unstructured":"Brady, A., Laoide, R.\u00d3., McCarthy, P., McDermott, R.: Discrepancy and error in radiology: concepts, causes and consequences. Ulst. Med. J. 81(1), 3 (2012)","journal-title":"Ulst. Med. J."},{"key":"15_CR3","doi-asserted-by":"crossref","unstructured":"Byun, J., Kim, D., Moon, T.: MAFA: managing false negatives for vision-language pre-training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 27314\u201327324 (2024)","DOI":"10.1109\/CVPR52733.2024.02579"},{"key":"15_CR4","first-page":"12546","volume":"33","author":"K Chaitanya","year":"2020","unstructured":"Chaitanya, K., Erdil, E., Karani, N., Konukoglu, E.: Contrastive learning of global and local features for medical image segmentation with limited annotations. Adv. Neural. Inf. Process. Syst. 33, 12546\u201312558 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"15_CR5","unstructured":"Chan, A., Ong, Y.S., Pung, B., Zhang, A., Fu, J.: CoCon: a self-supervised approach for controlled text generation. In: International Conference on Learning Representations (2021)"},{"key":"15_CR6","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, pp. 1597\u20131607. PMLR (2020)"},{"key":"15_CR7","doi-asserted-by":"crossref","unstructured":"Chen, Z., Shen, Y., Song, Y., Wan, X.: Cross-modal memory networks for radiology report generation. In: Zong, C., Xia, F., Li, W., Navigli, R. (eds.) Proceedings of the Annual Meeting of the Association for Computational Linguistics, pp. 5904\u20135914. Association for Computational Linguistics (2021)","DOI":"10.18653\/v1\/2021.acl-long.459"},{"key":"15_CR8","doi-asserted-by":"crossref","unstructured":"Chen, Z., Song, Y., Chang, T.H., Wan, X.: Generating radiology reports via memory-driven transformer. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing, pp. 1439\u20131449. Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.112"},{"key":"15_CR9","doi-asserted-by":"crossref","unstructured":"Chen, Z., Luo, L., Bie, Y., Chen, H.: Dia-LLaMA: towards large language model-driven CT report generation. arXiv preprint arXiv:2403.16386 (2024)","DOI":"10.1007\/978-3-032-04981-0_14"},{"key":"15_CR10","doi-asserted-by":"crossref","unstructured":"Donahue, J., et al.: Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2625\u20132634 (2015)","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"15_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2020.101857","volume":"67","author":"RL Draelos","year":"2021","unstructured":"Draelos, R.L., et al.: Machine-learning-based multiple abnormality prediction with large-scale chest computed tomography volumes. Med. Image Anal. 67, 101857 (2021)","journal-title":"Med. Image Anal."},{"key":"15_CR12","doi-asserted-by":"crossref","unstructured":"Feng, R., Zhou, Z., Gotway, M.B., Liang, J.: Parts2whole: self-supervised contrastive learning via reconstruction. In: Domain Adaptation and Representation Transfer, and Distributed and Collaborative Learning: Second MICCAI Workshop, DART 2020, and First MICCAI Workshop, DCL 2020, Held in Conjunction with MICCAI 2020, Lima, Peru, October 4\u20138, 2020, Proceedings 2, pp. 85\u201395. Springer (2020)","DOI":"10.1007\/978-3-030-60548-3_9"},{"issue":"7","key":"15_CR13","doi-asserted-by":"publisher","first-page":"3435","DOI":"10.1109\/JBHI.2022.3153902","volume":"26","author":"R Gong","year":"2022","unstructured":"Gong, R., Han, X., Wang, J., Ying, S., Shi, J.: Self-supervised bi-channel transformer networks for computer-aided diagnosis. IEEE J. Biomed. Health Inform. 26(7), 3435\u20133446 (2022)","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"15_CR14","unstructured":"Hamamci, I.E., et\u00a0al.: A foundation model utilizing chest CT volumes and radiology reports for supervised-level zero-shot detection of abnormalities. CoRR (2024)"},{"key":"15_CR15","doi-asserted-by":"crossref","unstructured":"Hamamci, I.E., Er, S., Menze, B.: CT2Rep: automated radiology report generation for 3D medical imaging. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 476\u2013486. Springer (2024)","DOI":"10.1007\/978-3-031-72390-2_45"},{"key":"15_CR16","unstructured":"Hamamci, I.E., et\u00a0al.: GenerateCT: text-guided 3D chest CT generation. CoRR (2023)"},{"key":"15_CR17","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738 (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"15_CR18","unstructured":"Hu, E.J., et al.: LoRA: low-rank adaptation of large language models. In: International Conference on Learning Representations (2022)"},{"key":"15_CR19","doi-asserted-by":"crossref","unstructured":"Huang, S.C., Shen, L., Lungren, M.P., Yeung, S.: GLoRIA: a multimodal global-local representation learning framework for label-efficient medical image recognition. In: IEEE International Conference on Computer Vision, pp. 3942\u20133951 (2021)","DOI":"10.1109\/ICCV48922.2021.00391"},{"key":"15_CR20","doi-asserted-by":"crossref","unstructured":"Irvin, J., et\u00a0al.: CheXpert: a large chest radiograph dataset with uncertainty labels and expert comparison. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a033, pp. 590\u2013597 (2019)","DOI":"10.1609\/aaai.v33i01.3301590"},{"key":"15_CR21","unstructured":"Jain, S., et al.: Radgraph: extracting clinical entities and relations from radiology reports. In: Neural Information Processing Systems Datasets and Benchmarks Track (Round 1) (2021)"},{"key":"15_CR22","doi-asserted-by":"crossref","unstructured":"Jin, H., Che, H., Lin, Y., Chen, H.: PromptMRG: diagnosis-driven prompts for medical report generation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 2607\u20132615 (2024)","DOI":"10.1609\/aaai.v38i3.28038"},{"key":"15_CR23","doi-asserted-by":"crossref","unstructured":"Jing, B., Wang, Z., Xing, E.: Show, describe and conclude: on exploiting the structure information of chest X-ray reports. In: Korhonen, A., Traum, D., M\u00e0rquez, L. (eds.) Annual Meetings of the Association for Computational Linguistics, pp. 6570\u20136580. Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/P19-1657"},{"key":"15_CR24","doi-asserted-by":"crossref","unstructured":"Li, C.Y., Liang, X., Hu, Z., Xing, E.P.: Knowledge-driven encode, retrieve, paraphrase for medical image report generation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a033, pp. 6666\u20136673 (2019)","DOI":"10.1609\/aaai.v33i01.33016666"},{"key":"15_CR25","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: BLIP: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: International Conference on Machine Learning, pp. 12888\u201312900. PMLR (2022)"},{"key":"15_CR26","first-page":"9694","volume":"34","author":"J Li","year":"2021","unstructured":"Li, J., Selvaraju, R., Gotmare, A., Joty, S., Xiong, C., Hoi, S.: Align before fuse: vision and language representation learning with momentum distillation. Adv. Neural. Inf. Process. Syst. 34, 9694\u20139705 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"1","key":"15_CR27","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1007\/s11280-022-01013-6","volume":"26","author":"M Li","year":"2023","unstructured":"Li, M., Liu, R., Wang, F., Chang, X., Liang, X.: Auxiliary signal-guided knowledge encoder-decoder for medical report generation. World Wide Web 26(1), 253\u2013270 (2023)","journal-title":"World Wide Web"},{"key":"15_CR28","unstructured":"Li, Y., Liang, X., Hu, Z., Xing, E.P.: Hybrid retrieval-generation reinforced agent for medical image report generation. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"key":"15_CR29","unstructured":"Lin, C.Y.: ROUGE: a package for automatic evaluation of summaries. In: Text Summarization Branches Out, pp. 74\u201381 (2004)"},{"issue":"12","key":"15_CR30","doi-asserted-by":"publisher","first-page":"3579","DOI":"10.1109\/TMI.2023.3294980","volume":"42","author":"B Liu","year":"2023","unstructured":"Liu, B., et al.: Improving medical vision-language contrastive pretraining with semantics-aware triage. IEEE Trans. Med. Imaging 42(12), 3579\u20133589 (2023)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"15_CR31","doi-asserted-by":"crossref","unstructured":"Liu, F., Wu, X., Ge, S., Fan, W., Zou, Y.: Exploring and distilling posterior and prior knowledge for radiology report generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13753\u201313762 (2021)","DOI":"10.1109\/CVPR46437.2021.01354"},{"key":"15_CR32","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: International Conference on Learning Representations (2019)"},{"key":"15_CR33","doi-asserted-by":"crossref","unstructured":"Lu, J., Xiong, C., Parikh, D., Socher, R.: Knowing when to look: adaptive attention via a visual sentinel for image captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 375\u2013383 (2017)","DOI":"10.1109\/CVPR.2017.345"},{"key":"15_CR34","doi-asserted-by":"publisher","DOI":"10.1016\/j.artmed.2023.102633","volume":"144","author":"A Nicolson","year":"2023","unstructured":"Nicolson, A., Dowling, J., Koopman, B.: Improving chest X-ray report generation by leveraging warm starting. Artif. Intell. Med. 144, 102633 (2023)","journal-title":"Artif. Intell. Med."},{"issue":"3","key":"15_CR35","doi-asserted-by":"publisher","first-page":"1448","DOI":"10.1109\/JBHI.2023.3345932","volume":"28","author":"R Pan","year":"2024","unstructured":"Pan, R., Ran, R., Hu, W., Zhang, W., Qin, Q., Cui, S.: S3-Net: a self-supervised dual-stream network for radiology report generation. IEEE J. Biomed. Health Inform. 28(3), 1448\u20131459 (2024)","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"15_CR36","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J.: BLEU: a method for automatic evaluation of machine translation. In: Annual Meetings of the Association for Computational Linguistics, pp. 311\u2013318 (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"15_CR37","unstructured":"Paszke, A., et\u00a0al.: PyTorch: an imperative style, high-performance deep learning library. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"issue":"6","key":"15_CR38","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2016","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 39(6), 1137\u20131149 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"15_CR39","doi-asserted-by":"crossref","unstructured":"Smit, A., Jain, S., Rajpurkar, P., Pareek, A., Ng, A., Lungren, M.: CheXbert: combining automatic labelers and expert annotations for accurate radiology report labeling using BERT. In: Webber, B., Cohn, T., He, Y., Liu, Y. (eds.) Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing, pp. 1500\u20131519. Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.117"},{"key":"15_CR40","doi-asserted-by":"crossref","unstructured":"Taleb, A., Kirchler, M., Monti, R., Lippert, C.: ContIG: self-supervised multimodal contrastive learning for medical imaging with genetics. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20908\u201320921 (2022)","DOI":"10.1109\/CVPR52688.2022.02024"},{"key":"15_CR41","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.121442","volume":"237","author":"Y Tang","year":"2024","unstructured":"Tang, Y., Yang, H., Zhang, L., Yuan, Y.: Work like a doctor: unifying scan localizer and dynamic generator for automated computed tomography report generation. Expert Syst. Appl. 237, 121442 (2024)","journal-title":"Expert Syst. Appl."},{"key":"15_CR42","doi-asserted-by":"crossref","unstructured":"Tanida, T., M\u00fcller, P., Kaissis, G., Rueckert, D.: Interactive and explainable region-guided radiology report generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7433\u20137442 (2023)","DOI":"10.1109\/CVPR52729.2023.00718"},{"key":"15_CR43","unstructured":"Touvron, H., et\u00a0al.: Llama 2: open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)"},{"key":"15_CR44","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: a neural image caption generator. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3156\u20133164 (2015)","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"15_CR45","first-page":"33536","volume":"35","author":"F Wang","year":"2022","unstructured":"Wang, F., Zhou, Y., Wang, S., Vardhanabhuti, V., Yu, L.: Multi-granularity cross-modal alignment for generalized medical visual representation learning. Adv. Neural. Inf. Process. Syst. 35, 33536\u201333549 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"3","key":"15_CR46","doi-asserted-by":"publisher","DOI":"10.1016\/j.metrad.2023.100033","volume":"1","author":"Z Wang","year":"2023","unstructured":"Wang, Z., Liu, L., Wang, L., Zhou, L.: R2GenGPT: radiology report generation with frozen LLMs. Meta-Radiology 1(3), 100033 (2023)","journal-title":"Meta-Radiology"},{"key":"15_CR47","doi-asserted-by":"crossref","unstructured":"Wang, Z., Tang, M., Wang, L., Li, X., Zhou, L.: A medical semantic-assisted transformer for radiographic report generation. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 655\u2013664. Springer (2022)","DOI":"10.1007\/978-3-031-16437-8_63"},{"key":"15_CR48","doi-asserted-by":"crossref","unstructured":"Wang, Z., Zhou, L., Wang, L., Li, X.: A self-boosting framework for automated radiographic report generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2433\u20132442 (2021)","DOI":"10.1109\/CVPR46437.2021.00246"},{"key":"15_CR49","unstructured":"Wu, C., Zhang, X., Zhang, Y., Wang, Y., Xie, W.: Towards generalist foundation model for radiology. arXiv preprint arXiv:2308.02463 (2023)"},{"key":"15_CR50","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2023.102798","volume":"86","author":"S Yang","year":"2023","unstructured":"Yang, S., Wu, X., Ge, S., Zheng, Z., Zhou, S.K., Xiao, L.: Radiology report generation with a learned knowledge base and multi-modal alignment. Med. Image Anal. 86, 102798 (2023)","journal-title":"Med. Image Anal."},{"key":"15_CR51","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2022.102510","volume":"80","author":"S Yang","year":"2022","unstructured":"Yang, S., Wu, X., Ge, S., Zhou, S.K., Xiao, L.: Knowledge matters: chest radiology report generation with general and specific knowledge. Med. Image Anal. 80, 102510 (2022)","journal-title":"Med. Image Anal."},{"key":"15_CR52","doi-asserted-by":"crossref","unstructured":"Zhang, X., Wu, C., Zhao, Z., Lei, J., Zhang, Y., Wang, Y., Xie, W.: RadGenome-Chest CT: a grounded vision-language dataset for chest CT analysis. arXiv preprint arXiv:2404.16754 (2024)","DOI":"10.1038\/s41597-025-05922-9"}],"container-title":["Lecture Notes in Computer Science","Information Processing in Medical Imaging"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-96625-5_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T13:26:26Z","timestamp":1777469186000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-96625-5_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,7]]},"ISBN":["9783031966248","9783031966255"],"references-count":52,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-96625-5_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,7]]},"assertion":[{"value":"7 August 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IPMI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Information Processing in Medical Imaging","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kos","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 May 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 May 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ipmi2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ipmi2025.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}