{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T21:44:49Z","timestamp":1769723089792,"version":"3.49.0"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031723834","type":"print"},{"value":"9783031723841","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-72384-1_29","type":"book-chapter","created":{"date-parts":[[2024,10,2]],"date-time":"2024-10-02T11:02:53Z","timestamp":1727866973000},"page":"303-313","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Multivariate Cooperative Game for\u00a0Image-Report Pairs: Hierarchical Semantic Alignment for\u00a0Medical Report Generation"],"prefix":"10.1007","author":[{"given":"Zhihong","family":"Zhu","sequence":"first","affiliation":[]},{"given":"Xuxin","family":"Cheng","sequence":"additional","affiliation":[]},{"given":"Yunyan","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Zhaorun","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Qingqing","family":"Long","sequence":"additional","affiliation":[]},{"given":"Hongxiang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Zhiqi","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Xian","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Yefeng","family":"Zheng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,3]]},"reference":[{"key":"29_CR1","doi-asserted-by":"crossref","unstructured":"Anderson, P., He, X., Buehler, C., Teney, D., Johnson, M., Gould, S., Zhang, L.: Bottom-up and top-down attention for image captioning and visual question answering. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"29_CR2","unstructured":"Banerjee, S., Lavie, A.: METEOR: an automatic metric for MT evaluation with improved correlation with human judgments. In: ACL (2005)"},{"key":"29_CR3","doi-asserted-by":"crossref","unstructured":"Beltagy, I., Lo, K., Cohan, A.: Scibert: A pretrained language model for scientific text. In: EMNLP (2019)","DOI":"10.18653\/v1\/D19-1371"},{"key":"29_CR4","doi-asserted-by":"crossref","unstructured":"Cao, Y., Cui, L., Zhang, L., Yu, F., Li, Z., Xu, Y.: Mmtn: Multi-modal memory transformer network for image-report consistent medical report generation. In: AAAI (2023)","DOI":"10.1609\/aaai.v37i1.25100"},{"key":"29_CR5","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.E.: A simple framework for contrastive learning of visual representations. In: ICML (2020)"},{"key":"29_CR6","doi-asserted-by":"crossref","unstructured":"Chen, Z., Shen, Y., Song, Y., Wan, X.: Cross-modal memory networks for radiology report generation. In: ACL\/IJCNLP (2021)","DOI":"10.18653\/v1\/2021.acl-long.459"},{"key":"29_CR7","doi-asserted-by":"crossref","unstructured":"Chen, Z., Song, Y., Chang, T., Wan, X.: Generating radiology reports via memory-driven transformer. In: EMNLP (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.112"},{"key":"29_CR8","doi-asserted-by":"publisher","DOI":"10.1093\/jamia\/ocv080","volume-title":"Preparing a collection of radiology examinations for distribution and retrieval","author":"D Demner-Fushman","year":"2016","unstructured":"Demner-Fushman, D., Kohli, M.D., Rosenman, M.B., Shooshan, S.E., Rodriguez, L., Antani, S.K., Thoma, G.R., McDonald, C.J.: Preparing a collection of radiology examinations for distribution and retrieval. J. Am. Medical Informatics Assoc. (2016)"},{"key":"29_CR9","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., Houlsby, N.: An image is worth 16x16 words: Transformers for image recognition at scale. In: ICLR (2021)"},{"key":"29_CR10","doi-asserted-by":"crossref","unstructured":"D\u2019Orsogna, M.R., Chuang, Y.L., Bertozzi, A.L., Chayes, L.S.: Self-propelled particles with soft-core interactions: patterns, stability, and collapse. Physical review letters (2006)","DOI":"10.1103\/PhysRevLett.96.104302"},{"key":"29_CR11","doi-asserted-by":"crossref","unstructured":"Irvin, J., Rajpurkar, P., Ko, M., Yu, Y., Ciurea-Ilcus, S., Chute, C., Marklund, H., Haghgoo, B., Ball, R.L., Shpanskaya, K.S., Seekins, J., Mong, D.A., Halabi, S.S., Sandberg, J.K., Jones, R., Larson, D.B., Langlotz, C.P., Patel, B.N., Lungren, M.P., Ng, A.Y.: Chexpert: A large chest radiograph dataset with uncertainty labels and expert comparison. In: AAAI (2019)","DOI":"10.1609\/aaai.v33i01.3301590"},{"key":"29_CR12","doi-asserted-by":"crossref","unstructured":"Jin, P., Huang, J., Xiong, P., Tian, S., Liu, C., Ji, X., Yuan, L., Chen, J.: Video-text as game players: Hierarchical banzhaf interaction for cross-modal representation learning. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00244"},{"key":"29_CR13","doi-asserted-by":"crossref","unstructured":"Jing, B., Xie, P., Xing, E.P.: On the automatic generation of medical imaging reports. In: ACL (2018)","DOI":"10.18653\/v1\/P18-1240"},{"key":"29_CR14","unstructured":"Johnson, A.E.W., Pollard, T.J., Berkowitz, S.J., Greenbaum, N.R., Lungren, M.P., Deng, C., Mark, R.G., Horng, S.: MIMIC-CXR: A large publicly available database of labeled chest radiographs. CoRR (2019)"},{"key":"29_CR15","doi-asserted-by":"crossref","unstructured":"Li, C.Y., Liang, X., Hu, Z., Xing, E.P.: Knowledge-driven encode, retrieve, paraphrase for medical image report generation. In: AAAI (2019)","DOI":"10.1609\/aaai.v33i01.33016666"},{"key":"29_CR16","doi-asserted-by":"crossref","unstructured":"Li, H., Cao, M., Cheng, X., Li, Y., Zhu, Z., Zou, Y.: G2l: Semantically aligned and uniform video grounding via geodesic and game theory. In: CVPR (2023)","DOI":"10.1109\/ICCV51070.2023.01105"},{"key":"29_CR17","doi-asserted-by":"crossref","unstructured":"Li, M., Lin, B., Chen, Z., Lin, H., Liang, X., Chang, X.: Dynamic graph enhanced contrastive learning for chest x-ray report generation. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00325"},{"key":"29_CR18","doi-asserted-by":"crossref","unstructured":"Li, Y., Yang, B., Cheng, X., Zhu, Z., Li, H., Zou, Y.: Unify, align and refine: Multi-level semantic alignment for radiology report generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2023)","DOI":"10.1109\/ICCV51070.2023.00268"},{"key":"29_CR19","unstructured":"Lin, C.Y.: ROUGE: A package for automatic evaluation of summaries. In: Text Summarization Branches Out (2004)"},{"key":"29_CR20","unstructured":"Liu, C., Tian, Y., Song, Y.: A systematic review of deep learning-based research on radiology report generation. arXiv (2023)"},{"key":"29_CR21","doi-asserted-by":"crossref","unstructured":"Liu, F., Ge, S., Wu, X.: Competence-based multimodal curriculum learning for medical report generation. In: ACL\/IJCNLP (2021)","DOI":"10.18653\/v1\/2021.acl-long.234"},{"key":"29_CR22","doi-asserted-by":"crossref","unstructured":"Liu, F., Wu, X., Ge, S., Fan, W., Zou, Y.: Exploring and distilling posterior and prior knowledge for radiology report generation. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01354"},{"key":"29_CR23","doi-asserted-by":"crossref","unstructured":"Liu, F., Yin, C., Wu, X., Ge, S., Zhang, P., Sun, X.: Contrastive attention for automatic chest x-ray report generation. In: ACL\/IJCNLP (2021)","DOI":"10.18653\/v1\/2021.findings-acl.23"},{"key":"29_CR24","unstructured":"Van\u00a0der Maaten, L., Hinton, G.: Visualizing data using t-sne. Journal of Machine Learning Research (2008)"},{"key":"29_CR25","doi-asserted-by":"crossref","unstructured":"Marichal, J., Mathonet, P.: Weighted banzhaf power and interaction indexes through weighted approximations of games. Eur. J. Oper. Res. (2011)","DOI":"10.1016\/j.ejor.2010.11.027"},{"key":"29_CR26","doi-asserted-by":"crossref","unstructured":"Nicolson, A., Dowling, J., Koopman, B.: Improving chest x-ray report generation by leveraging warm starting. Artificial intelligence in medicine (2023)","DOI":"10.1016\/j.artmed.2023.102633"},{"key":"29_CR27","doi-asserted-by":"crossref","unstructured":"Nooralahzadeh, F., Gonzalez, N.P., Frauenfelder, T., Fujimoto, K., Krauthammer, M.: Progressive transformer-based generation of radiology reports. In: EMNLP (2021)","DOI":"10.18653\/v1\/2021.findings-emnlp.241"},{"key":"29_CR28","unstructured":"van\u00a0den Oord, A., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. CoRR (2018)"},{"key":"29_CR29","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.: BLEU: a method for automatic evaluation of machine translation. In: ACL (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"29_CR30","unstructured":"Raffel, C., Shazeer, N., Roberts, A., Lee, K., Narang, S., Matena, M., Zhou, Y., Li, W., Liu, P.J.: Exploring the limits of transfer learning with a unified text-to-text transformer. The Journal of Machine Learning Research (2020)"},{"key":"29_CR31","doi-asserted-by":"crossref","unstructured":"Rennie, S.J., Marcheret, E., Mroueh, Y., Ross, J., Goel, V.: Self-critical sequence training for image captioning. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.131"},{"key":"29_CR32","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., Polosukhin, I.: Attention is all you need. In: NeurIPS (2017)"},{"key":"29_CR33","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Zitnick, C.L., Parikh, D.: Cider: Consensus-based image description evaluation. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"29_CR34","doi-asserted-by":"crossref","unstructured":"Wang, X., Peng, Y., Lu, L., Lu, Z., Summers, R.M.: Tienet: Text-image embedding network for common thorax disease classification and reporting in chest x-rays. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00943"},{"key":"29_CR35","doi-asserted-by":"crossref","unstructured":"Wang, Z., Liu, L., Wang, L., Zhou, L.: Metransformer: Radiology report generation by transformer with multiple learnable expert tokens. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.01112"},{"key":"29_CR36","doi-asserted-by":"crossref","unstructured":"Wang, Z., Tang, M., Wang, L., Li, X., Zhou, L.: A medical semantic-assisted transformer for radiographic report generation. In: MICCAI (2022)","DOI":"10.1007\/978-3-031-16437-8_63"},{"key":"29_CR37","doi-asserted-by":"crossref","unstructured":"Wang, Z., Wu, Z., Agarwal, D., Sun, J.: Medclip: Contrastive learning from unpaired medical images and text. In: EMNLP (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.256"},{"key":"29_CR38","doi-asserted-by":"crossref","unstructured":"Yan, A., He, Z., Lu, X., Du, J., Chang, E.Y., Gentili, A., McAuley, J.J., Hsu, C.: Weakly supervised contrastive learning for chest x-ray report generation. In: EMNLP (2021)","DOI":"10.18653\/v1\/2021.findings-emnlp.336"},{"key":"29_CR39","doi-asserted-by":"crossref","unstructured":"Yang, Y., Liu, X.: A re-examination of text categorization methods. In: SIGIR (1999)","DOI":"10.1145\/312624.312647"},{"key":"29_CR40","doi-asserted-by":"crossref","unstructured":"You, D., Liu, F., Ge, S., Xie, X., Zhang, J., Wu, X.: Aligntransformer: Hierarchical alignment of visual regions and disease tags for medical report generation. MICCAI (2022)","DOI":"10.1007\/978-3-030-87199-4_7"},{"key":"29_CR41","unstructured":"You, J., Li, D., Okumura, M., Suzuki, K.: JPG - jointly learn to align: Automated disease prediction and radiology report generation. In: COLING (2022)"},{"key":"29_CR42","unstructured":"Zhu, Z., Zhang, Y., Cheng, X., Huang, Z., Xu, D., Wu, X., Zheng, Y.: Alignment before awareness: Towards visual question localized-answering in robotic surgery via optimal transport and answer semantics. In: COLING (2024)"}],"container-title":["Lecture Notes in Computer Science","Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72384-1_29","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,2]],"date-time":"2024-10-02T11:17:11Z","timestamp":1727867831000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72384-1_29"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031723834","9783031723841"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72384-1_29","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"3 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"MICCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Medical Image Computing and Computer-Assisted Intervention","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Marrakesh","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Morocco","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miccai2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conferences.miccai.org\/2024\/en\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}