{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T20:37:31Z","timestamp":1770496651204,"version":"3.49.0"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,1,8]],"date-time":"2025-01-08T00:00:00Z","timestamp":1736294400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,8]],"date-time":"2025-01-08T00:00:00Z","timestamp":1736294400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62262005"],"award-info":[{"award-number":["62262005"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Innovation Team of University in Guizhou Province","award":["2022033"],"award-info":[{"award-number":["2022033"]}]},{"name":"High-level Innovative Talents in Guizhou Province","award":["GCC[2023]033"],"award-info":[{"award-number":["GCC[2023]033"]}]},{"name":"Natural Science Research Project of Guizhou Provincial Department of Education","award":["QJJ[2023]011"],"award-info":[{"award-number":["QJJ[2023]011"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1007\/s00530-024-01649-6","type":"journal-article","created":{"date-parts":[[2025,1,8]],"date-time":"2025-01-08T16:52:55Z","timestamp":1736355175000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Cross-modal retrieval of chest X-ray images and diagnostic reports based on report entity graph and dual attention"],"prefix":"10.1007","volume":"31","author":[{"given":"Weihua","family":"Ou","sequence":"first","affiliation":[]},{"given":"Yingjie","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Linqing","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Jianping","family":"Gou","sequence":"additional","affiliation":[]},{"given":"Jiahao","family":"Xiong","sequence":"additional","affiliation":[]},{"given":"Jiacheng","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Lingge","family":"Lai","sequence":"additional","affiliation":[]},{"given":"Lei","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,8]]},"reference":[{"key":"1649_CR1","unstructured":"Andrew, G., Arora, R., Bilmes, J., Livescu, K.: Deep canonical correlation analysis. In: International conference on machine learning. pp. 1247\u20131255. PMLR (2013)"},{"key":"1649_CR2","doi-asserted-by":"crossref","unstructured":"Biswal, S., Xiao, C., Glass, L.M., Westover, B., Sun, J.: Clara: Clinical report auto-completion. In: Proceedings of The Web Conference 2020. pp. 541\u2013550 (2020)","DOI":"10.1145\/3366423.3380137"},{"key":"1649_CR3","unstructured":"Chen, R., Wang, H., Wang, L., Kwong, S.: Two-stream hierarchical similarity reasoning for image-text matching (2022). arXiv preprint arXiv:2203.05349"},{"key":"1649_CR4","doi-asserted-by":"crossref","unstructured":"Cheng, P., Lin, L., Lyu, J., Huang, Y., Luo, W., Tang, X.: Prior: prototype representation joint learning from medical images and reports. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 21361\u201321371 (2023)","DOI":"10.1109\/ICCV51070.2023.01953"},{"key":"1649_CR5","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: A large-scale hierarchical image database. In: 2009 IEEE conference on computer vision and pattern recognition. pp. 248\u2013255. Ieee (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1649_CR6","doi-asserted-by":"crossref","unstructured":"Diao, H., Zhang, Y., Ma, L., Lu, H.: Similarity reasoning and filtration for image-text matching. In: Proceedings of the AAAI Conference on Artificial Intelligence. pp. 1218\u20131226 (2021)","DOI":"10.1609\/aaai.v35i2.16209"},{"key":"1649_CR7","unstructured":"Dina, D.F., Kohli, M., Rosenman, M.B., Shooshan, S.E., Laritza, R., Sameer, A., Thoma, G.R., Mcdonald, C.J.: Preparing a collection of radiology examinations for distribution and retrieval. J. Am. Med. Inform. Assoc. Jamia, 2 (2015)"},{"key":"1649_CR8","doi-asserted-by":"crossref","unstructured":"Dong, Z.: Research on medical image registration based on graphic neural network reinforcement learning. In: Journal of Physics: Conference Series. vol.\u00a01693, p. 012131. IOP Publishing (2020)","DOI":"10.1088\/1742-6596\/1693\/1\/012131"},{"key":"1649_CR9","unstructured":"Endo, M., Krishnan, R., Krishna, V., Ng, A.Y., Rajpurkar, P.: Retrieval-based chest x-ray report generation using a pre-trained contrastive language-image model. In: Proceedings of Machine Learning for Health. Proceedings of Machine Learning Research, vol.\u00a0158, pp. 209\u2013219. PMLR (2021)"},{"key":"1649_CR10","unstructured":"Faghri, F., Fleet, D.J., Kiros, J.R., Fidler, S.: Vse++: Improving visual-semantic embeddings with hard negatives. In: Proceedings of the British Machine Vision Conference (BMVC) (2018)"},{"issue":"2","key":"1649_CR11","doi-asserted-by":"publisher","first-page":"546","DOI":"10.1109\/TMI.2022.3224660","volume":"42","author":"N Gaggion","year":"2022","unstructured":"Gaggion, N., Mansilla, L., Mosquera, C., Milone, D.H., Ferrante, E.: Improving anatomical plausibility in medical image segmentation via hybrid graph neural networks: applications to chest x-ray analysis. IEEE Trans. Med. Imaging 42(2), 546\u2013556 (2022)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"1649_CR12","doi-asserted-by":"crossref","unstructured":"Goyal, Y., Khot, T., Summers-Stay, D., Batra, D., Parikh, D.: Making the v in vqa matter: elevating the role of image understanding in visual question answering. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.670"},{"issue":"9","key":"1649_CR13","doi-asserted-by":"publisher","first-page":"2246","DOI":"10.1109\/TMI.2021.3073986","volume":"40","author":"L Hansen","year":"2021","unstructured":"Hansen, L., Heinrich, M.P.: Graphregnet: deep graph regularisation networks on sparse keypoints for dense registration of 3d lung cts. IEEE Trans. Med. Imaging 40(9), 2246\u20132257 (2021)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"1649_CR14","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1649_CR15","doi-asserted-by":"crossref","unstructured":"Huang, S.C., Shen, L., Lungren, M.P., Yeung, S.: Gloria: A multimodal global-local representation learning framework for label-efficient medical image recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3942\u20133951 (2021)","DOI":"10.1109\/ICCV48922.2021.00391"},{"key":"1649_CR16","doi-asserted-by":"crossref","unstructured":"Irvin, J., Rajpurkar, P., Ko, M., Yu, Y., Ciurea-Ilcus, S., Chute, C., Marklund, H., Haghgoo, B., Ball, R., Shpanskaya, K., et\u00a0al.: Chexpert: a large chest radiograph dataset with uncertainty labels and expert comparison. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 590\u2013597 (2019)","DOI":"10.1609\/aaai.v33i01.3301590"},{"key":"1649_CR17","unstructured":"Jain, S., Agrawal, A., Saporta, A., Truong, S.Q., Duong, D.N., Bui, T., Chambon, P., Zhang, Y., Lungren, M.P., Ng, A.Y.a.: Radgraph: Extracting clinical entities and relations from radiology reports. In: Neural Information Processing Systems (2021)"},{"key":"1649_CR18","doi-asserted-by":"crossref","unstructured":"Jiang, J., Chen, X., Tian, G., Liu, Y.: Vig-unet: vision graph neural networks for medical image segmentation. In: 2023 IEEE 20th International Symposium on Biomedical Imaging (ISBI). pp.\u00a01\u20135. IEEE (2023)","DOI":"10.1109\/ISBI53787.2023.10230496"},{"key":"1649_CR19","unstructured":"Johnson, A.E., Pollard, T.J., Greenbaum, N.R., Lungren, M.P., Deng, C.y., Peng, Y., Lu, Z., Mark, R.G., Berkowitz, S.J., Horng, S.: Mimic-cxr-jpg, a large publicly available database of labeled chest radiographs (2019). arXiv preprint arXiv:1901.07042"},{"issue":"2","key":"1649_CR20","doi-asserted-by":"publisher","first-page":"132","DOI":"10.1136\/jnnp-2016-314005","volume":"88","author":"M Jury\u0144czyk","year":"2017","unstructured":"Jury\u0144czyk, M., Tackley, G., Kong, Y., Geraldes, R., Matthews, L., Woodhall, M., Waters, P., Kuker, W., Craner, M., Weir, A., et al.: Brain lesion distribution criteria distinguish ms from aqp4-antibody nmosd and mog-antibody disease. J. Neurol. Neurosurg. Psychiatry 88(2), 132\u2013136 (2017)","journal-title":"J. Neurol. Neurosurg. Psychiatry"},{"key":"1649_CR21","unstructured":"Kenton, J.D.M.W.C., Toutanova, L.K.: Bert: Pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NaacL-HLT. vol.\u00a01, p.\u00a02 (2019)"},{"key":"1649_CR22","unstructured":"Kipf, T.N., Welling, M.: Semi-Supervised Classification with Graph Convolutional Networks. In: Proceedings of the 5th International Conference on Learning Representations. ICLR \u201917 (2017)"},{"key":"1649_CR23","doi-asserted-by":"crossref","unstructured":"Lee, K.H., Chen, X., Hua, G., Hu, H., He, X.: Stacked cross attention for image-text matching. In: Proceedings of the European Conference on Computer Vision (ECCV). pp. 201\u2013216 (2018)","DOI":"10.1007\/978-3-030-01225-0_13"},{"key":"1649_CR24","unstructured":"Li, F., Zhu, L., Wang, T., Li, J., Zhang, Z., Shen, H.T.: Cross-modal retrieval: a systematic review of methods and future directions (2023)"},{"key":"1649_CR25","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: BLIP-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In: Krause, A., Brunskill, E., Cho, K., Engelhardt, B., Sabato, S., Scarlett, J. (eds.) Proceedings of the 40th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a0202, pp. 19730\u201319742. PMLR (23\u201329)"},{"key":"1649_CR26","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In: International Conference on Machine Learning. pp. 12888\u201312900. PMLR (2022)"},{"key":"1649_CR27","first-page":"9694","volume":"34","author":"J Li","year":"2021","unstructured":"Li, J., Selvaraju, R., Gotmare, A., Joty, S., Xiong, C., Hoi, S.C.H.: Align before fuse: vision and language representation learning with momentum distillation. Adv. Neural Inform. Process. Syst. 34, 9694\u20139705 (2021)","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"1649_CR28","doi-asserted-by":"crossref","unstructured":"Li, M., Lin, B., Chen, Z., Lin, H., Liang, X., Chang, X.: Dynamic graph enhanced contrastive learning for chest x-ray report generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 3334\u20133343 (2023)","DOI":"10.1109\/CVPR52729.2023.00325"},{"key":"1649_CR29","unstructured":"Li, Y., Liang, X., Hu, Z., Xing, E.P.: Hybrid retrieval-generation reinforced agent for medical image report generation. Adv. Neural Inform. Process. Syst. 31 (2018)"},{"issue":"12","key":"1649_CR30","doi-asserted-by":"publisher","first-page":"6999","DOI":"10.1109\/TNNLS.2021.3084827","volume":"33","author":"Z Li","year":"2021","unstructured":"Li, Z., Liu, F., Yang, W., Peng, S., Zhou, J.: A survey of convolutional neural networks: analysis, applications, and prospects. IEEE Trans. Neural Netw. Learn. Syst. 33(12), 6999\u20137019 (2021)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"1649_CR31","doi-asserted-by":"crossref","unstructured":"Liu, C., Mao, Z., Liu, A.A., Zhang, T., Wang, B., Zhang, Y.: Focus your attention: a bidirectional focal attention network for image-text matching. In: Proceedings of the 27th ACM International Conference on Multimedia. pp. 3\u201311 (2019)","DOI":"10.1145\/3343031.3350869"},{"issue":"1","key":"1649_CR32","doi-asserted-by":"publisher","first-page":"166","DOI":"10.1109\/TFUZZ.2020.2984991","volume":"29","author":"H Lu","year":"2020","unstructured":"Lu, H., Zhang, M., Xu, X., Li, Y., Shen, H.T.: Deep fuzzy hashing network for efficient image retrieval. IEEE Trans. Fuzzy Syst. 29(1), 166\u2013176 (2020)","journal-title":"IEEE Trans. Fuzzy Syst."},{"issue":"2","key":"1649_CR33","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1016\/S0016-0032(96)00063-4","volume":"334","author":"ML Men\u00e9ndez","year":"1997","unstructured":"Men\u00e9ndez, M.L., Pardo, J., Pardo, L., Pardo, M.: The Jensen\u2013Shannon divergence. J. Franklin Inst. 334(2), 307\u2013318 (1997)","journal-title":"J. Franklin Inst."},{"key":"1649_CR34","doi-asserted-by":"crossref","unstructured":"M\u00fcller, P., Kaissis, G., Zou, C., Rueckert, D.: Joint learning of localized representations from medical images and reports. In: European Conference on Computer Vision. pp. 685\u2013701. Springer (2022)","DOI":"10.1007\/978-3-031-19809-0_39"},{"key":"1649_CR35","doi-asserted-by":"crossref","unstructured":"Nam, H., Ha, J.W., Kim, J.: Dual attention networks for multimodal reasoning and matching. In: Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.232"},{"key":"1649_CR36","unstructured":"Shi, Y., Chen, S., You, X., Peng, Q., Ou, W., Zhao, Y.: Deep supervised information bottleneck hashing for cross-modal retrieval based computer-aided diagnosis (2022). arXiv preprint arXiv:2205.08365"},{"key":"1649_CR37","doi-asserted-by":"crossref","unstructured":"Vandaele, R., Mukherjee, P., Selby, H.M., Shah, R.P., Gevaert, O.: Topological data analysis of thoracic radiographic images shows improved radiomics-based lung tumor histology prediction. Patterns 4(1) (2023)","DOI":"10.1016\/j.patter.2022.100657"},{"key":"1649_CR38","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Adv. Neural Inform. Process. Syst. 30 (2017)"},{"key":"1649_CR39","first-page":"33536","volume":"35","author":"F Wang","year":"2022","unstructured":"Wang, F., Zhou, Y., Wang, S., Vardhanabhuti, V., Yu, L.: Multi-granularity cross-modal alignment for generalized medical visual representation learning. Adv. Neural Inform. Process. Syst. 35, 33536\u201333549 (2022)","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"1649_CR40","unstructured":"Wang, P., Yang, A., Men, R., Lin, J., Bai, S., Li, Z., Ma, J., Zhou, C., Zhou, J., Yang, H.: Ofa: Unifying architectures, tasks, and modalities through a simple sequence-to-sequence learning framework. In: International Conference on Machine Learning. pp. 23318\u201323340. PMLR (2022)"},{"key":"1649_CR41","doi-asserted-by":"crossref","unstructured":"Wang, T., Huang, J., Zhang, H., Sun, Q.: Visual commonsense r-cnn. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 10760\u201310770 (2020)","DOI":"10.1109\/CVPR42600.2020.01077"},{"key":"1649_CR42","doi-asserted-by":"crossref","unstructured":"Wang, Z., Wu, Z., Agarwal, D., Sun, J.: MedCLIP: Contrastive learning from unpaired medical images and text. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing. pp. 3876\u20133887 (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.256"},{"key":"1649_CR43","doi-asserted-by":"crossref","unstructured":"Wu, C., Zhang, X., Zhang, Y., Wang, Y., Xie, W.: Medklip: Medical knowledge enhanced language-image pre-training for x-ray diagnosis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 21372\u201321383 (2023)","DOI":"10.1109\/ICCV51070.2023.01954"},{"key":"1649_CR44","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2022.102510","volume":"80","author":"S Yang","year":"2022","unstructured":"Yang, S., Wu, X., Ge, S., Zhou, S.K., Xiao, L.: Knowledge matters: chest radiology report generation with general and specific knowledge. Med. Image Anal. 80, 102510 (2022)","journal-title":"Med. Image Anal."},{"issue":"2","key":"1649_CR45","doi-asserted-by":"publisher","first-page":"444","DOI":"10.1109\/TMI.2022.3219260","volume":"42","author":"H Zhang","year":"2022","unstructured":"Zhang, H., Song, R., Wang, L., Zhang, L., Wang, D., Wang, C., Zhang, W.: Classification of brain disorders in rs-fmri via local-to-global graph neural networks. IEEE Trans. Med. Imaging 42(2), 444\u2013455 (2022)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"1649_CR46","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Wang, X., Xu, Z., Yu, Q., Yuille, A., Xu, D.: When radiology report generation meets knowledge graph. In: Proceedings of the AAAI Conference on Artificial Intelligence. pp. 12910\u201312917 (2020)","DOI":"10.1609\/aaai.v34i07.6989"},{"issue":"4","key":"1649_CR47","doi-asserted-by":"publisher","first-page":"1519","DOI":"10.1007\/s11280-021-00881-8","volume":"25","author":"Y Zhang","year":"2022","unstructured":"Zhang, Y., Ou, W., Shi, Y., Deng, J., You, X., Wang, A.: Deep medical cross-modal attention hashing. World Wide Web 25(4), 1519\u20131536 (2022)","journal-title":"World Wide Web"},{"key":"1649_CR48","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2021.107673","volume":"98","author":"Y Zhang","year":"2022","unstructured":"Zhang, Y., Ou, W., Zhang, J., Deng, J.: Category supervised cross-modal hashing retrieval for chest X-ray and radiology reports. Comput. Electr. Eng. 98, 107673 (2022)","journal-title":"Comput. Electr. Eng."},{"key":"1649_CR49","unstructured":"Zhang, Y., Jiang, H., Miura, Y., Manning, C.D., Langlotz, C.P.: Contrastive learning of medical visual representations from paired images and text. In: Machine Learning for Healthcare Conference. pp. 2\u201325. PMLR (2022)"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01649-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-024-01649-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01649-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T11:03:58Z","timestamp":1740740638000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-024-01649-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,8]]},"references-count":49,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,2]]}},"alternative-id":["1649"],"URL":"https:\/\/doi.org\/10.1007\/s00530-024-01649-6","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1,8]]},"assertion":[{"value":"5 July 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 December 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 January 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no Conflict of interest to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"58"}}