{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T20:49:13Z","timestamp":1758055753370,"version":"3.44.0"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,5,6]],"date-time":"2025-05-06T00:00:00Z","timestamp":1746489600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,6]],"date-time":"2025-05-06T00:00:00Z","timestamp":1746489600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61802269"],"award-info":[{"award-number":["61802269"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s00530-025-01760-2","type":"journal-article","created":{"date-parts":[[2025,5,6]],"date-time":"2025-05-06T11:38:04Z","timestamp":1746531484000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Text semantic structure-guided correlation learning for cross-modal retrieval"],"prefix":"10.1007","volume":"31","author":[{"given":"Jie","family":"Zhu","sequence":"first","affiliation":[]},{"given":"Jingjing","family":"Fan","sequence":"additional","affiliation":[]},{"given":"Jianguang","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Shufang","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Jianan","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,6]]},"reference":[{"key":"1760_CR1","doi-asserted-by":"publisher","first-page":"371","DOI":"10.1109\/TBDATA.2023.3338951","volume":"10","author":"Y Bai","year":"2023","unstructured":"Bai, Y., Shu, Z., Yu, J. et al.: Proxy-based graph convolutional hashing for cross-modal retrieval. IEEE Trans. Big Data. 10(4), 371\u2013385 (2023)","journal-title":"IEEE Trans. Big Data"},{"key":"1760_CR2","doi-asserted-by":"crossref","unstructured":"Bogolin, S.V., Croitoru, I., Jin, H., et\u00a0al.: Cross modal retrieval with querybank normalisation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5194\u20135205 (2022)","DOI":"10.1109\/CVPR52688.2022.00513"},{"key":"1760_CR3","doi-asserted-by":"crossref","unstructured":"Bronstein, M.M., Bronstein, A.M., Michel, F., et\u00a0al.: Data fusion through cross-modality metric learning using similarity-sensitive hashing. In: 2010 IEEE computer society conference on computer vision and pattern recognition, pp 3594\u20133601 (2010)","DOI":"10.1109\/CVPR.2010.5539928"},{"issue":"8","key":"1760_CR4","doi-asserted-by":"publisher","first-page":"4887","DOI":"10.1109\/TSMC.2021.3103842","volume":"52","author":"B Chen","year":"2021","unstructured":"Chen, B., Zhang, Z., Lu, Y., et al.: Semantic-interactive graph convolutional network for multilabel image recognition. IEEE Trans. Syst. Man Cybern.: Syst. 52(8), 4887\u20134899 (2021)","journal-title":"IEEE Trans. Syst. Man Cybern.: Syst."},{"key":"1760_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Z.M., Wei, X.S., Wang, P., et\u00a0al.: Multi-label image recognition with graph convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 5172\u20135181 (2019)","DOI":"10.1109\/CVPR.2019.00532"},{"key":"1760_CR6","doi-asserted-by":"crossref","unstructured":"Cheng, M., Sun, Y., Wang, L., et\u00a0al.: Vista: vision and scene text aggregation for cross-modal retrieval. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 5184\u20135193 (2022)","DOI":"10.1109\/CVPR52688.2022.00512"},{"key":"1760_CR7","doi-asserted-by":"crossref","unstructured":"Chua, T.S., Tang, J., Hong, R., et\u00a0al.: Nus-wide a real-world web image database from national university of singapore. In: Proceedings of the ACM International Conference on Image and Video Retrieval, pp 48\u201356 (2009)","DOI":"10.1145\/1646396.1646452"},{"issue":"3","key":"1760_CR8","doi-asserted-by":"publisher","first-page":"1634","DOI":"10.1109\/TCSVT.2021.3075242","volume":"32","author":"X Dong","year":"2022","unstructured":"Dong, X., Liu, L., Zhu, L., et al.: Adversarial graph convolutional network for cross-modal retrieval. IEEE Trans. Circuits Syst. Video Technol. 32(3), 1634\u20131645 (2022)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"3","key":"1760_CR9","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1093\/biomet\/28.3-4.321","volume":"28","author":"H Hotelling","year":"1936","unstructured":"Hotelling, H.: Relations between two sets of variates. Biometrika 28(3), 321\u2013377 (1936)","journal-title":"Biometrika"},{"key":"1760_CR10","doi-asserted-by":"crossref","unstructured":"Huiskes, M.J., Thomee, B., Lew, M.S.: The mir flickr retrieval evaluation initiative. In: Proceedings of the 1st ACM International Conference on Multimedia Information Retrieval, pp 39\u201343 (2008)","DOI":"10.1145\/1460096.1460104"},{"key":"1760_CR11","unstructured":"Jia, C., Yang, Y., Xia, Y., et\u00a0al.: Scaling up visual and vision-language representation learning with noisy text supervision. In: International conference on machine learning, PMLR, pp 4904\u20134916 (2021)"},{"key":"1760_CR12","doi-asserted-by":"crossref","unstructured":"Jiang, Q.Y., Li, W.J.: Deep cross-modal hashing. In: Proceeding of IEEE Conference on Computer Vision Pattern Recognition, pp 3232\u20133240 (2017)","DOI":"10.1109\/CVPR.2017.348"},{"issue":"1","key":"1760_CR13","doi-asserted-by":"publisher","first-page":"188","DOI":"10.1109\/TPAMI.2015.2435740","volume":"38","author":"M Kan","year":"2015","unstructured":"Kan, M., Shan, S., Zhang, H., et al.: Multi-view discriminant analysis. IEEE Trans. Pattern Anal. Mach. Intell. 38(1), 188\u2013194 (2015)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1760_CR14","unstructured":"Kipf, T.N., Welling, M.: Semi-supervised classification with graph convolutional networks. In: International Conference on Learning Representations, pp 1\u201314 (2017)"},{"key":"1760_CR15","doi-asserted-by":"crossref","unstructured":"Li, C., Deng, C., Li, N., et\u00a0al.: Self-supervised adversarial hashing networks for cross-modal retrieval. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 4242\u20134251 (2018)","DOI":"10.1109\/CVPR.2018.00446"},{"key":"1760_CR16","doi-asserted-by":"crossref","unstructured":"Li, D., Dimitrova, N., Li, M., et\u00a0al.: Multimedia content processing through cross-modal association. In: Proceedings of the Eleventh ACM International Conference on Multimedia, pp 604\u2013611 (2003)","DOI":"10.1145\/957013.957143"},{"key":"1760_CR17","unstructured":"Li, J., Li, D., Xiong, C., et\u00a0al.: BLIP: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In: Proceedings of the 39th International Conference on Machine Learning, Proceedings of Machine Learning Research, vol 162. PMLR, pp 12888\u201312900 (2022)"},{"issue":"7","key":"1760_CR18","doi-asserted-by":"publisher","first-page":"6530","DOI":"10.1109\/TCSVT.2024.3350695","volume":"34","author":"J Li","year":"2024","unstructured":"Li, J., Wong, W.K., Jiang, L., et al.: Ckdh: clip-based knowledge distillation hashing for cross-modal retrieval. IEEE Trans. Circuits Syst. Video Technol. 34(7), 6530\u20136541 (2024)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1760_CR19","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Maire, M., Belongie, S., et\u00a0al.: Microsoft coco: common objects in context. In: Proceedings of the European Conference on Computer Vision, pp 740\u2013755 (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1760_CR20","doi-asserted-by":"publisher","first-page":"3622","DOI":"10.1109\/TIP.2023.3286710","volume":"32","author":"C Liu","year":"2023","unstructured":"Liu, C., Zhang, Y., Wang, H., et al.: Efficient token-guided image-text retrieval with consistent multimodal contrastive training. IEEE Trans. Image Process. 32, 3622\u20133633 (2023)","journal-title":"IEEE Trans. Image Process."},{"key":"1760_CR21","doi-asserted-by":"crossref","unstructured":"Ma, Y., Wang, M., Lu, G., et\u00a0al.: Multi-label semantic sharing based on graph convolutional network for image-to-text retrieval. The Visual Computer, pp 1\u201314 (2024)","DOI":"10.1007\/s00371-024-03496-y"},{"key":"1760_CR22","unstructured":"Maas, A.L., Hannun, A.Y., Ng, A.Y.: Rectifier nonlinearities improve neural network acoustic models. In: Proceeding of the International Conference on Machine Learning, pp 1\u20136 (2013)"},{"issue":"1","key":"1760_CR23","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3284750","volume":"15","author":"Y Peng","year":"2019","unstructured":"Peng, Y., Qi, J.: Cm-gans: cross-modal generative adversarial networks for common representation learning. ACM Trans. Multimed. Comput. Commun. Appl. 15(1), 1\u201324 (2019)","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"1760_CR24","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.: Glove: Global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing, pp 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"1760_CR25","doi-asserted-by":"crossref","unstructured":"Qian, S., Xue, D., Zhang, H., et\u00a0al.: Dual adversarial graph neural networks for multi-label cross-modal retrieval. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp 2440\u20132448 (2021)","DOI":"10.1609\/aaai.v35i3.16345"},{"key":"1760_CR26","doi-asserted-by":"publisher","first-page":"3520","DOI":"10.1109\/TMM.2021.3101642","volume":"24","author":"S Qian","year":"2022","unstructured":"Qian, S., Xue, D., Fang, Q., et al.: Adaptive label-aware graph convolutional networks for cross-modal retrieval. IIEEE Trans. Multimed. 24, 3520\u20133532 (2022)","journal-title":"IIEEE Trans. Multimed."},{"issue":"4","key":"1760_CR27","first-page":"4794","volume":"45","author":"S Qian","year":"2022","unstructured":"Qian, S., Xue, D., Fang, Q., et al.: Integrating multi-label contrastive learning with dual adversarial graph neural networks for cross-modal retrieval. IEEE Trans. Pattern Anal. Mach. Intell. 45(4), 4794\u20134811 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1760_CR28","doi-asserted-by":"publisher","first-page":"6361","DOI":"10.1109\/TMM.2023.3349075","volume":"26","author":"Q Qin","year":"2024","unstructured":"Qin, Q., Huo, Y., Huang, L., et al.: Deep neighborhood-preserving hashing with quadratic spherical mutual information for cross-modal retrieval. IEEE Trans. Multimed. 26, 6361\u20136374 (2024)","journal-title":"IEEE Trans. Multimed."},{"key":"1760_CR29","unstructured":"Radford, A., Kim, J.W., Hallacy, C., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, PMLR, pp 8748\u20138763 (2021)"},{"issue":"45","key":"1760_CR30","first-page":"1","volume":"1","author":"DE Rumelhart","year":"1986","unstructured":"Rumelhart, D.E., Hinton, G.E., Mcclelland, J.L.: A general framework for parallel distributed processing. Parallel Distrib. Process.: Explor. Microstruct. Cognit. 1(45), 1\u201326 (1986)","journal-title":"Parallel Distrib. Process.: Explor. Microstruct. Cognit."},{"key":"1760_CR31","doi-asserted-by":"crossref","unstructured":"Sharma, A., Kumar, A., Daume, H., et\u00a0al.: Generalized multiview analysis: a discriminative latent space. In: Proceeding of IEEE Conference on Computer Vision Pattern Recognition, pp 2160\u20132167 (2012)","DOI":"10.1109\/CVPR.2012.6247923"},{"key":"1760_CR32","doi-asserted-by":"crossref","unstructured":"Shen, X., Chen, Y., Liu, W., et\u00a0al.: Graph convolutional multi-label hashing for cross-modal retrieval. IEEE Transactions on Neural Networks and Learning Systems pp 1\u201313 (2024)","DOI":"10.2139\/ssrn.5071394"},{"key":"1760_CR33","doi-asserted-by":"crossref","unstructured":"Shen, Z., Zhai, D., Liu, X., et\u00a0al.: Semi-supervised graph convolutional hashing network for large-scale cross-modal retrieval. In: 2020 IEEE International Conference on Image Processing, pp 2366\u20132370 (2020)","DOI":"10.1109\/ICIP40778.2020.9190641"},{"key":"1760_CR34","unstructured":"Shotaro, A.: A kernel method for canonical correlation analysis. In: International Meeting of Psychometric Society, pp 263\u2013269 (2001)"},{"key":"1760_CR35","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arxiv preprint arxiv:1409.1556, 1409 (2014)"},{"key":"1760_CR36","doi-asserted-by":"crossref","unstructured":"Wang, B., Yang, Y., Xu, X., et\u00a0al.: Adversarial cross-modal retrieval. In: Proceedings of the ACM International Conference on Multimedia, pp 154\u2013162 (2017)","DOI":"10.1145\/3123266.3123326"},{"key":"1760_CR37","unstructured":"Wang, D., Gao, X., Wang, X., et\u00a0al.: Semantic topic multimodal hashing for cross-media retrieval. In: Twenty-fourth International Joint Conference on Artificial Intelligence, pp 3890\u20133896 (2015)"},{"key":"1760_CR38","doi-asserted-by":"crossref","unstructured":"Wang, K., He, R., Wang, W., et\u00a0al.: Learning coupled feature spaces for cross-modal matching. In: Proceedings of the IEEE International Conference on Computer Vision, pp 2088\u20132095 (2013)","DOI":"10.1109\/ICCV.2013.261"},{"key":"1760_CR39","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.109211","volume":"136","author":"F Wu","year":"2023","unstructured":"Wu, F., Li, S., Gao, G., et al.: Semi-supervised cross-modal hashing via modality-specific and cross-modal graph convolutional networks. Pattern Recogn. 136, 109211 (2023)","journal-title":"Pattern Recogn."},{"issue":"11","key":"1760_CR40","doi-asserted-by":"publisher","first-page":"5835","DOI":"10.1109\/TKDE.2024.3419577","volume":"36","author":"Q Wu","year":"2024","unstructured":"Wu, Q., Zhang, Z., Liu, Y., et al.: Contrastive multi-bit collaborative learning for deep cross-modal hashing. IEEE Trans. Knowl. Data Eng. 36(11), 5835\u20135848 (2024)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"1760_CR41","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.101968","volume":"100","author":"X Xia","year":"2023","unstructured":"Xia, X., Dong, G., Li, F., et al.: When clip meets cross-modal hashing retrieval: a new strong baseline. Inf. Fusion 100, 101968 (2023)","journal-title":"Inf. Fusion"},{"key":"1760_CR42","doi-asserted-by":"publisher","first-page":"3626","DOI":"10.1109\/TIP.2020.2963957","volume":"29","author":"D Xie","year":"2020","unstructured":"Xie, D., Deng, C., Li, C.: Multi-task consistency-preserving adversarial hashing for cross-modal retrieval. IEEE Trans. Image Process. 29, 3626\u20133637 (2020)","journal-title":"IEEE Trans. Image Process."},{"key":"1760_CR43","doi-asserted-by":"crossref","unstructured":"Xie, Y., Liu, Y., Wang, Y., et\u00a0al.: Label-attended hashing for multi-label image retrieval. In: International Joint Conference on Artificial Intelligence, pp 955\u2013962 (2020b)","DOI":"10.24963\/ijcai.2020\/133"},{"key":"1760_CR44","doi-asserted-by":"crossref","unstructured":"Xu, R., Li, C., Yan, J., et\u00a0al.: Graph convolutional network hashing for cross-modal retrieval. In: International Joint Conference on Artificial Intelligence, pp 982\u2013988 (2019)","DOI":"10.24963\/ijcai.2019\/138"},{"key":"1760_CR45","doi-asserted-by":"crossref","unstructured":"Yang, E., Deng, C., Liu, W., et\u00a0al.: Pairwise relationship guided deep hashing for cross-modal retrieval. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp 1618\u20131625 (2017)","DOI":"10.1609\/aaai.v31i1.10719"},{"key":"1760_CR46","doi-asserted-by":"crossref","unstructured":"Yao, T., Mei, T., Ngo, C.W.: Learning query and image similarities with ranking canonical correlation analysis. In: Proceedings of the IEEE International Conference on Computer Vision, pp 28\u201336 (2015)","DOI":"10.1109\/ICCV.2015.12"},{"issue":"6","key":"1760_CR47","doi-asserted-by":"publisher","first-page":"965","DOI":"10.1109\/TCSVT.2013.2276704","volume":"24","author":"X Zhai","year":"2014","unstructured":"Zhai, X., Peng, Y., Xiao, J.: Learning cross-media joint representation with sparse and semisupervised regularization. IEEE Trans. Circuits Syst. Video Technol. 24(6), 965\u2013978 (2014)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1760_CR48","doi-asserted-by":"publisher","first-page":"9736","DOI":"10.1109\/TMM.2024.3397191","volume":"26","author":"C Zhan","year":"2024","unstructured":"Zhan, C., Zhang, Y., Lin, Y., et al.: Unidcp: unifying multiple medical vision-language tasks via dynamic cross-modal learnable prompts. IEEE Trans. Multimed. 26, 9736\u20139748 (2024)","journal-title":"IEEE Trans. Multimed."},{"key":"1760_CR49","doi-asserted-by":"crossref","unstructured":"Zhang, D., Li, W.J.: Large-scale supervised multimodal hashing with semantic correlation maximization. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp 2177\u20132183 (2014)","DOI":"10.1609\/aaai.v28i1.8995"},{"key":"1760_CR50","doi-asserted-by":"publisher","first-page":"466","DOI":"10.1109\/TMM.2021.3053766","volume":"24","author":"PF Zhang","year":"2021","unstructured":"Zhang, P.F., Li, Y., Huang, Z., et al.: Aggregation-based graph convolutional hashing for unsupervised cross-modal retrieval. IEEE Trans. Multimed. 24, 466\u2013479 (2021)","journal-title":"IEEE Trans. Multimed."},{"key":"1760_CR51","doi-asserted-by":"crossref","unstructured":"Zhen, L., Hu, P., Wang, X., et\u00a0al.: Deep supervised cross-modal retrieval. In: Proceeding of IEEE Conference on Computer Vision Pattern Recognition, pp 10394\u201310403 (2019)","DOI":"10.1109\/CVPR.2019.01064"},{"key":"1760_CR52","unstructured":"Zhen, Y., Yeung, D.Y.: Co-regularized hashing for multimodal data. In: Proceedings of the International Conference on Neural Information Processing Systems, pp 1376\u20131384 (2012)"},{"key":"1760_CR53","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2024.121279","volume":"682","author":"J Zhu","year":"2024","unstructured":"Zhu, J., Zhang, H., Chen, J., et al.: Adaptive multi-label structure preserving network for cross-modal retrieval. Inf. Sci. 682, 121279 (2024)","journal-title":"Inf. Sci."}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01760-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-01760-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01760-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,15]],"date-time":"2025-09-15T09:01:31Z","timestamp":1757926891000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-01760-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,6]]},"references-count":53,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["1760"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-01760-2","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"type":"print","value":"0942-4962"},{"type":"electronic","value":"1432-1882"}],"subject":[],"published":{"date-parts":[[2025,5,6]]},"assertion":[{"value":"3 January 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 March 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"233"}}