{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T21:47:18Z","timestamp":1772833638831,"version":"3.50.1"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2023,12,2]],"date-time":"2023-12-02T00:00:00Z","timestamp":1701475200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,2]],"date-time":"2023-12-02T00:00:00Z","timestamp":1701475200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2024,2]]},"DOI":"10.1007\/s11704-023-3186-6","type":"journal-article","created":{"date-parts":[[2023,12,2]],"date-time":"2023-12-02T02:02:17Z","timestamp":1701482537000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Alignment efficient image-sentence retrieval considering transferable cross-modal representation learning"],"prefix":"10.1007","volume":"18","author":[{"given":"Yang","family":"Yang","sequence":"first","affiliation":[]},{"given":"Jinyi","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Guangyu","family":"Li","sequence":"additional","affiliation":[]},{"given":"Lanyu","family":"Li","sequence":"additional","affiliation":[]},{"given":"Wenjie","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,2]]},"reference":[{"issue":"2","key":"3186_CR1","doi-asserted-by":"crossref","first-page":"120112","DOI":"10.1007\/s11432-019-2721-0","volume":"63","author":"Z Wang","year":"2020","unstructured":"Wang Z, Liu X, Lin J, Yang C, Li H. Multi-attention based cross-domain beauty product image retrieval. Science China Information Sciences, 2020, 63(2): 120112","journal-title":"Science China Information Sciences"},{"key":"3186_CR2","unstructured":"Wang K, Yin Q, Wang W, Wu S, Wang L. A comprehensive survey on cross-modal retrieval. 2016, arXiv preprint arXiv: 1607.06215"},{"issue":"4","key":"3186_CR3","doi-asserted-by":"crossref","first-page":"921","DOI":"10.1007\/s11263-020-01392-1","volume":"129","author":"Y Peng","year":"2021","unstructured":"Peng Y, Qi J, Ye Z, Zhuo Y. Hierarchical visual-textual knowledge distillation for life-long correlation learning. International Journal of Computer Vision, 2021, 129(4): 921\u2013941","journal-title":"International Journal of Computer Vision"},{"issue":"3","key":"3186_CR4","first-page":"489","volume":"16","author":"Y Liu","year":"2022","unstructured":"Liu Y, Guo Y Y, Fang J, Fan J L, Hao Y, Liu J M. Survey of research on deep learning image-text cross-modal retrieval. Journal of Frontiers of Computer Science & Technology, 2022, 16(3): 489\u2013511","journal-title":"Journal of Frontiers of Computer Science & Technology"},{"key":"3186_CR5","doi-asserted-by":"crossref","unstructured":"Chi J, Peng Y. Dual adversarial networks for zero-shot cross-media retrieval. In: Proceedings of the 27th International Joint Conference on Artificial Intelligence. 2018, 663\u2013669","DOI":"10.24963\/ijcai.2018\/92"},{"key":"3186_CR6","doi-asserted-by":"crossref","unstructured":"Zhen L, Hu P, Wang X, Peng D. Deep supervised cross-modal retrieval. In: Proceedings of 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2019, 10386\u201310395","DOI":"10.1109\/CVPR.2019.01064"},{"issue":"10","key":"3186_CR7","doi-asserted-by":"crossref","first-page":"4540","DOI":"10.1109\/TIP.2016.2592800","volume":"25","author":"D Wang","year":"2016","unstructured":"Wang D, Gao X, Wang X, He L, Yuan B. Multimodal discriminative binary embedding for large-scale cross-modal retrieval. IEEE Transactions on Image Processing, 2016, 25(10): 4540\u20134554","journal-title":"IEEE Transactions on Image Processing"},{"issue":"9","key":"3186_CR8","doi-asserted-by":"crossref","first-page":"092104","DOI":"10.1007\/s11432-015-0902-2","volume":"60","author":"W Qu","year":"2017","unstructured":"Qu W, Wang D, Feng S, Zhang Y, Yu G. A novel cross-modal hashing algorithm based on multimodal deep learning. Science China Information Sciences, 2017, 60(9): 092104","journal-title":"Science China Information Sciences"},{"key":"3186_CR9","doi-asserted-by":"crossref","unstructured":"Wang Z, Liu X, Li H, Sheng L, Yan J, Wang X, Shao J. CAMP: cross-modal adaptive message passing for text-image retrieval. In: Proceedings of 2019 IEEE\/CVF International Conference on Computer Vision. 2019, 5763\u20135772","DOI":"10.1109\/ICCV.2019.00586"},{"key":"3186_CR10","doi-asserted-by":"crossref","unstructured":"Lee K H, Chen X, Hua G, Hu H, He X. Stacked cross attention for image-text matching. In: Proceedings of the 15th European Conference on Computer Vision. 2018, 212\u2013228","DOI":"10.1007\/978-3-030-01225-0_13"},{"key":"3186_CR11","doi-asserted-by":"crossref","unstructured":"Zhang Y, Lu H. Deep cross-modal projection learning for image-text matching. In: Proceedings of the 15th European Conference on Computer Vision. 2018, 707\u2013723","DOI":"10.1007\/978-3-030-01246-5_42"},{"key":"3186_CR12","doi-asserted-by":"crossref","unstructured":"Yu F, Tang J, Yin W, Sun Y, Tian H, Wu H, Wang H. ERNIE-ViL: Knowledge enhanced vision-language representations through scene graphs. In: Proceedings of AAAI Conference on Artificial Intelligence. 2021, 3208\u20133216","DOI":"10.1609\/aaai.v35i4.16431"},{"key":"3186_CR13","doi-asserted-by":"crossref","first-page":"2728","DOI":"10.1109\/TIP.2019.2952085","volume":"29","author":"Y Peng","year":"2020","unstructured":"Peng Y, Qi J, Zhuo Y. MAVA: multi-level adaptive visual-textual alignment by cross-media bi-attention mechanism. IEEE Transactions on Image Processing, 2020, 29: 2728\u20132741","journal-title":"IEEE Transactions on Image Processing"},{"issue":"2","key":"3186_CR14","doi-asserted-by":"crossref","first-page":"1086","DOI":"10.1109\/TCYB.2020.2985716","volume":"52","author":"Z Ji","year":"2022","unstructured":"Ji Z, Wang H, Han J, Pang Y. SMAN: stacked multimodal attention network for cross-modal image-text retrieval. IEEE Transactions on Cybernetics, 2022, 52(2): 1086\u20131097","journal-title":"IEEE Transactions on Cybernetics"},{"key":"3186_CR15","unstructured":"Frome A, Corrado G S, Shlens J, Bengio S, Dean J, Ranzato M, Mikolov T. DeViSE: a deep visual-semantic embedding model. In: Proceedings of the 26th International Conference on Neural Information Processing Systems. 2013, 2121\u20132129"},{"key":"3186_CR16","doi-asserted-by":"crossref","unstructured":"Song G, Tan X. Sequential learning for cross-modal retrieval. In: Proceedings of 2019 IEEE\/CVF International Conference on Computer Vision Workshop. 2019, 4531\u20134539","DOI":"10.1109\/ICCVW.2019.00554"},{"key":"3186_CR17","doi-asserted-by":"crossref","unstructured":"Feng Y, Ma L, Liu W, Luo J. Unsupervised image captioning. In: Proceedings of 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2019, 4120\u20134129","DOI":"10.1109\/CVPR.2019.00425"},{"key":"3186_CR18","doi-asserted-by":"crossref","unstructured":"Gu J, Joty S R, Cai J, Zhao H, Yang X, Wang G. Unpaired image captioning via scene graph alignments. In: Proceedings of 2019 IEEE\/CVF International Conference on Computer Vision. 2019, 10322\u201310331","DOI":"10.1109\/ICCV.2019.01042"},{"key":"3186_CR19","doi-asserted-by":"crossref","unstructured":"Huang P Y, Kang G, Liu W, Chang X, Hauptmann A G. Annotation efficient cross-modal retrieval with adversarial attentive alignment. In: Proceedings of the 27th ACM International Conference on Multimedia. 2019, 1758\u20131767","DOI":"10.1145\/3343031.3350894"},{"issue":"2","key":"3186_CR20","doi-asserted-by":"crossref","first-page":"798","DOI":"10.1109\/TNNLS.2020.3029181","volume":"33","author":"L Zhen","year":"2020","unstructured":"Zhen L, Hu P, Peng X, Goh R S M, Zhou J T. Deep multimodal transfer learning for cross-modal retrieval. IEEE Transactions on Neural Networks and Learning Systems, 2020, 33(2): 798\u2013810","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"3186_CR21","doi-asserted-by":"crossref","unstructured":"Chen Q, Liu Y, Albanie S. Mind-the-gap! Unsupervised domain adaptation for text-video retrieval. In: Proceedings of AAAI Conference on Artificial Intelligence. 2021, 1072\u20131080","DOI":"10.1609\/aaai.v35i2.16192"},{"key":"3186_CR22","doi-asserted-by":"crossref","first-page":"1180","DOI":"10.1109\/TIP.2020.3042086","volume":"30","author":"W Zhao","year":"2021","unstructured":"Zhao W, Wu X, Luo J. Cross-domain image captioning via cross-modal retrieval and model adaptation. IEEE Transactions on Image Processing, 2021, 30: 1180\u20131192","journal-title":"IEEE Transactions on Image Processing"},{"key":"3186_CR23","doi-asserted-by":"crossref","first-page":"503","DOI":"10.1162\/tacl_a_00473","volume":"10","author":"G Geigle","year":"2022","unstructured":"Geigle G, Pfeiffer J, Reimers N, Vuli\u0107 I, Gurevych I. Retrieve fast, Rerank smart: cooperative and joint approaches for improved cross-modal retrieval. Transactions of the Association for Computational Linguistics, 2022, 10: 503\u2013521","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"3186_CR24","doi-asserted-by":"crossref","unstructured":"Yang Y, Zhang C, Xu Y C, Yu D, Zhan D C, Yang J. Rethinking label-wise cross-modal retrieval from A semantic sharing perspective. In: Proceedings of the 30th International Joint Conference on Artificial Intelligence. 2021, 3300\u20133306","DOI":"10.24963\/ijcai.2021\/454"},{"issue":"2","key":"3186_CR25","doi-asserted-by":"crossref","first-page":"199","DOI":"10.1109\/TNN.2010.2091281","volume":"22","author":"S J Pan","year":"2011","unstructured":"Pan S J, Tsang I W, Kwok J T, Yang Q. Domain adaptation via transfer component analysis. IEEE Transactions on Neural Networks, 2011, 22(2): 199\u2013210","journal-title":"IEEE Transactions on Neural Networks"},{"key":"3186_CR26","unstructured":"Scott T R, Ridgeway K, Mozer M C. Adapted deep embeddings: A synthesis of methods for k-shot inductive transfer learning. In: Proceedings of the 32nd International Conference on Neural Information Processing Systems. 2018, 76\u201385"},{"issue":"5","key":"3186_CR27","doi-asserted-by":"crossref","first-page":"165323","DOI":"10.1007\/s11704-021-1010-8","volume":"16","author":"Y Wang","year":"2022","unstructured":"Wang Y, Wang C, Xue H, Chen S. Self-corrected unsupervised domain adaptation. Frontiers of Computer Science, 2022, 16(5): 165323","journal-title":"Frontiers of Computer Science"},{"key":"3186_CR28","unstructured":"Yosinski J, Clune J, Bengio Y, Lipson H. How transferable are features in deep neural networks? In: Proceedings of the 27th International Conference on Neural Information Processing Systems. 2014, 3320\u20133328"},{"issue":"1","key":"3186_CR29","first-page":"2096","volume":"17","author":"Y Ganin","year":"2016","unstructured":"Ganin Y, Ustinova E, Ajakan H, Germain P, Larochelle H, Laviolette F, Marchand M, Lempitsky V. Domain-adversarial training of neural networks. The Journal of Machine Learning Research, 2016, 17(1): 2096\u20132030","journal-title":"The Journal of Machine Learning Research"},{"key":"3186_CR30","unstructured":"Long M, Cao Z, Wang J, Jordan M I. Conditional adversarial domain adaptation. In: Proceedings of the 32nd International Conference on Neural Information Processing Systems. 2018, 1647\u20131657"},{"key":"3186_CR31","unstructured":"Yao Z, Wang Y, Long M, Wang J. Unsupervised transfer learning for spatiotemporal predictive networks. In: Proceedings of the 37th International Conference on Machine Learning. 2020, 999"},{"key":"3186_CR32","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li L J, Li K, Fei-Fei L. ImageNet: a large-scale hierarchical image database. In: Proceedings of 2009 IEEE Conference on Computer Vision and Pattern Recognition. 2009, 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"4","key":"3186_CR33","doi-asserted-by":"crossref","first-page":"664","DOI":"10.1109\/TPAMI.2016.2598339","volume":"39","author":"A Karpathy","year":"2017","unstructured":"Karpathy A, Fei-Fei L. Deep visual-semantic alignments for generating image descriptions. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2017, 39(4): 664\u2013676","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"3186_CR34","unstructured":"Kiros R, Salakhutdinov R, Zemel R S. Unifying visual-semantic embeddings with multimodal neural language models. 2014, arXiv preprint arXiv: 1411.2539"},{"key":"3186_CR35","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1162\/tacl_a_00177","volume":"2","author":"R Socher","year":"2014","unstructured":"Socher R, Karpathy A, Le Q V, Manning C D, Ng A Y. Grounded compositional semantics for finding and describing images with sentences. Transactions of the Association for Computational Linguistics, 2014, 2: 207\u2013218","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"3186_CR36","unstructured":"Faghri F, Fleet D J, Kiros J R, Fidler S. VSE++: improving visual-semantic embeddings with hard negatives. In: Proceedings of the British Machine Vision Conference 2018. 2018, 12"},{"key":"3186_CR37","doi-asserted-by":"crossref","unstructured":"Diao H, Zhang Y, Ma L, Lu H. Similarity reasoning and filtration for image-text matching. In: Proceedings of the AAAI Conference on Artificial Intelligence. 2021, 1218\u20131226","DOI":"10.1609\/aaai.v35i2.16209"},{"key":"3186_CR38","doi-asserted-by":"crossref","unstructured":"Tzeng E, Hoffman J, Saenko K, Darrell T. Adversarial discriminative domain adaptation. In: Proceedings of 2017 IEEE Conference on Computer Vision and Pattern Recognition. 2017, 2962\u20132971","DOI":"10.1109\/CVPR.2017.316"},{"key":"3186_CR39","unstructured":"Luo Z, Zou Y, Hoffman J, Fei-Fei L. Label efficient learning of transferable representations across domains and tasks. In: Proceedings of the 31st International Conference on Neural Information Processing Systems. 2017, 165\u2013177"},{"key":"3186_CR40","unstructured":"Goodfellow I J, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A C, Bengio Y. Generative adversarial nets. In: Proceedings of the 27th International Conference on Neural Information Processing Systems. 2014, 2672\u20132680"},{"key":"3186_CR41","doi-asserted-by":"crossref","first-page":"173","DOI":"10.1007\/978-3-319-58347-1_9","volume-title":"Domain Adaptation in Computer Vision Applications","author":"J Hoffman","year":"2017","unstructured":"Hoffman J, Tzeng E, Darrell T, Saenko K. Simultaneous deep transfer across domains and tasks. In: Csurka G, ed. Domain Adaptation in Computer Vision Applications. Cham: Springer, 2017, 173\u2013187"},{"issue":"1","key":"3186_CR42","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1109\/JPROC.2020.3004555","volume":"109","author":"F Zhuang","year":"2021","unstructured":"Zhuang F, Qi Z, Duan K, Xi D, Zhu Y, Zhu H, Xiong H, He Q. A comprehensive survey on transfer learning. Proceedings of the IEEE, 2021, 109(1): 43\u201376","journal-title":"Proceedings of the IEEE"},{"key":"3186_CR43","doi-asserted-by":"crossref","unstructured":"Huiskes M J, Lew M S. The MIR flickr retrieval evaluation. In: Proceedings of the 1st ACM International Conference on Multimedia Information Retrieval. 2008, 39\u201343","DOI":"10.1145\/1460096.1460104"},{"key":"3186_CR44","doi-asserted-by":"crossref","unstructured":"Lin T, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick C L. Microsoft COCO: common objects in context. In: Proceedings of the 13th European Conference on Computer Vision. 2014, 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"3186_CR45","doi-asserted-by":"crossref","first-page":"162","DOI":"10.1007\/978-1-4612-4380-9_14","volume-title":"Breakthroughs in Statistics: Methodology and Distribution","author":"H Hotelling","year":"1992","unstructured":"Hotelling H. Relations between two sets of variates. In: Kotz S, Johnson N L, eds. Breakthroughs in Statistics: Methodology and Distribution. New York: Springer, 1992, 162\u2013190"},{"key":"3186_CR46","unstructured":"Andrew G, Arora R, Bilmes J A, Livescu K. Deep canonical correlation analysis. In: Proceedings of the 30th International Conference on Machine Learning. 2013, 1247\u20131255"},{"key":"3186_CR47","doi-asserted-by":"crossref","unstructured":"Zhang J, Peng Y, Yuan M. Unsupervised generative adversarial cross-modal hashing. In: Proceedings of AAAI Conference on Artificial Intelligence. 2018, 539\u2013546","DOI":"10.1609\/aaai.v32i1.11263"},{"key":"3186_CR48","doi-asserted-by":"crossref","unstructured":"Chen H, Ding G, Liu X, Lin Z, Liu J, Han J. IMRAM: iterative matching with recurrent attention memory for cross-modal image-text retrieval. In: Proceedings of 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2020, 12652\u201312660","DOI":"10.1109\/CVPR42600.2020.01267"},{"key":"3186_CR49","doi-asserted-by":"publisher","unstructured":"Peng S J, He Y, Liu X, Cheung Y M, Xu X, Cui Z. Relation-aggregated cross-graph correlation learning for fine-grained image\u2013text retrieval. IEEE Transactions on Neural Networks and Learning Systems, 2022, doi: https:\/\/doi.org\/10.1109\/TNNLS.2022.3188569","DOI":"10.1109\/TNNLS.2022.3188569"},{"issue":"5","key":"3186_CR50","doi-asserted-by":"crossref","first-page":"3669","DOI":"10.1109\/TCYB.2020.3015084","volume":"52","author":"Y Peng","year":"2022","unstructured":"Peng Y, Ye Z, Qi J, Zhuo Y. Unsupervised visual-textual correlation learning with fine-grained semantic alignment. IEEE Transactions on Cybernetics, 2022, 52(5): 3669\u20133683","journal-title":"IEEE Transactions on Cybernetics"},{"key":"3186_CR51","doi-asserted-by":"crossref","unstructured":"Saito K, Kim D, Sclaroff S, Darrell T, Saenko K. Semi-supervised domain adaptation via minimax entropy. In: Proceedings of 2019 IEEE\/CVF International Conference on Computer Vision. 2019, 8049\u20138057","DOI":"10.1109\/ICCV.2019.00814"},{"key":"3186_CR52","unstructured":"Kingma D P, Ba J. Adam: A method for stochastic optimization. In: Proceedings of the 3rd International Conference on Learning Representations. 2015"},{"issue":"1","key":"3186_CR53","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1109\/18.61115","volume":"37","author":"J Lin","year":"1991","unstructured":"Lin J. Divergence measures based on the Shannon entropy. IEEE Transactions on Information Theory, 1991, 37(1): 145\u2013151","journal-title":"IEEE Transactions on Information Theory"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-023-3186-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11704-023-3186-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-023-3186-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,23]],"date-time":"2025-03-23T02:21:35Z","timestamp":1742696495000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11704-023-3186-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,2]]},"references-count":53,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2024,2]]}},"alternative-id":["3186"],"URL":"https:\/\/doi.org\/10.1007\/s11704-023-3186-6","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"value":"2095-2228","type":"print"},{"value":"2095-2236","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,12,2]]},"assertion":[{"value":"3 March 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 July 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 December 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Competing interests The authors declare that they have no competing interests or financial conflicts to disclose.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics"}}],"article-number":"181335"}}