{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T11:27:18Z","timestamp":1770463638398,"version":"3.49.0"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"16","license":[{"start":{"date-parts":[[2023,5,9]],"date-time":"2023-05-09T00:00:00Z","timestamp":1683590400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,5,9]],"date-time":"2023-05-09T00:00:00Z","timestamp":1683590400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Cutting-Edge Technologies Research Program of Henan Province","award":["212102210540, 212102210547"],"award-info":[{"award-number":["212102210540, 212102210547"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2023,11]]},"DOI":"10.1007\/s11227-023-05318-9","type":"journal-article","created":{"date-parts":[[2023,5,10]],"date-time":"2023-05-10T21:54:11Z","timestamp":1683755651000},"page":"17810-17823","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Text-image matching for multi-model machine translation"],"prefix":"10.1007","volume":"79","author":[{"given":"Xiayang","family":"Shi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhenqiang","family":"Yu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuhui","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yijun","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yufeng","family":"Niu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,5,9]]},"reference":[{"key":"5318_CR1","doi-asserted-by":"crossref","unstructured":"Elliott D, Frank S, l Sima\u2019an K, Specia L (2016) Multi30k: multilingual English\u2013German image descriptions. arXiv preprint arXiv:1605.00459","DOI":"10.18653\/v1\/W16-3210"},{"key":"5318_CR2","unstructured":"Bahdanau D, Cho K, Bengio Y (2014) Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473"},{"key":"5318_CR3","doi-asserted-by":"crossref","unstructured":"Gehring J, Auli M, Grangier D, Dauphin YN (2016) A convolutional encoder model for neural machine translation. arXiv preprint arXiv:1611.02344","DOI":"10.18653\/v1\/P17-1012"},{"key":"5318_CR4","unstructured":"Zhang Z, Chen K, Wang R, Utiyama M, Sumita E, Li Z, Zhao H (2019) Neural machine translation with universal visual representation. In: International Conference on Learning Representations"},{"key":"5318_CR5","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick C (2014) Lawrence Microsoft coco: common objects in context. In: European conference on computer vision, Springer, pp 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"5318_CR6","doi-asserted-by":"crossref","unstructured":"Huang P-Y, Liu F, Shiang S-R, Oh J, Dyer C (2016) Attention-based multimodal neural machine translation. In: Proceedings of the First Conference on Machine Translation: Volume 2, Shared Task Papers, pp 639\u2013645","DOI":"10.18653\/v1\/W16-2360"},{"key":"5318_CR7","doi-asserted-by":"crossref","unstructured":"Calixto I, Liu Q, Campbell N (2017) Incorporating global visual features into attention-based neural machine translation. arXiv preprint arXiv:1701.06521","DOI":"10.18653\/v1\/D17-1105"},{"key":"5318_CR8","unstructured":"Caglayan O, Barrault L, Bougares F (2016) Multimodal attention for neural machine translation. arXiv preprint arXiv:1609.03976"},{"key":"5318_CR9","unstructured":"Delbrouck J-B, Dupont S (2017) Multimodal compact bilinear pooling for multimodal neural machine translation. arXiv preprint arXiv:1703.08084"},{"key":"5318_CR10","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1016\/j.ins.2020.11.024","volume":"554","author":"S Jinsong","year":"2021","unstructured":"Jinsong S, Chen J, Jiang H, Zhou C, Lin H, Ge Y, Qingqiang W, Lai Y (2021) Multi-modal neural machine translation with deep semantic interactions. Inf Sci 554:47\u201360","journal-title":"Inf Sci"},{"key":"5318_CR11","doi-asserted-by":"crossref","unstructured":"Lin H, Meng F, Su J, Yin Y, Yang Z, Ge Y, Zhou J, Luo J (2020) Dynamic context-guided capsule network for multimodal machine translation. In: Proceedings of the 28th ACM International Conference on Multimedia, pp 1320\u20131329","DOI":"10.1145\/3394171.3413715"},{"key":"5318_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.neucom.2021.12.076","volume":"476","author":"Y Zhao","year":"2022","unstructured":"Zhao Y, Komachi M, Kajiwara T, Chu C (2022) Region-attentive multimodal neural machine translation. Neurocomputing 476:1\u201313","journal-title":"Neurocomputing"},{"key":"5318_CR13","doi-asserted-by":"crossref","unstructured":"Ive J, Madhyastha P, Specia L (2019) Distilling translations with visual awareness. arXiv preprint arXiv:1906.07701","DOI":"10.18653\/v1\/P19-1653"},{"key":"5318_CR14","doi-asserted-by":"crossref","unstructured":"Zhang J, Utiyama M, Sumita E, Neubig G, Nakamura S (2017) Nict-naist system for wmt17 multimodal translation task. In: Proceedings of the Second Conference on Machine Translation, pp 477\u20134820","DOI":"10.18653\/v1\/W17-4753"},{"key":"5318_CR15","doi-asserted-by":"crossref","unstructured":"Gr\u00f6nroos S-A, Huet B, Kurimo M, Laaksonen J, Merialdo B, Pham P, Sj\u00f6berg M, Sulubacak U, Tiedemann J, Troncy R, et\u00a0al (2018) The memad submission to the wmt18 multimodal translation task. arXiv preprint arXiv:1808.10802","DOI":"10.18653\/v1\/W18-6439"},{"key":"5318_CR16","doi-asserted-by":"crossref","unstructured":"Zhou M, Cheng R, Lee YJ, Yu Z (2018) A visual attention grounding neural model for multimodal machine translation. arXiv preprint arXiv:1808.08266","DOI":"10.18653\/v1\/D18-1400"},{"key":"5318_CR17","doi-asserted-by":"publisher","first-page":"244","DOI":"10.1109\/TASLP.2021.3138719","volume":"30","author":"Y Zhao","year":"2021","unstructured":"Zhao Y, Komachi M, Kajiwara T, Chu C (2021) Word-region alignment-guided multimodal neural machine translation. IEEE\/ACM Trans Audio Speech Lang Process 30:244\u2013259","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"5318_CR18","doi-asserted-by":"crossref","unstructured":"Ji B, Zhang T, Zou Y, Hu B, Shen S (2022) Increasing visual awareness in multimodal neural machine translation from an information theoretic perspective. arXiv preprint arXiv:2210.08478","DOI":"10.18653\/v1\/2022.emnlp-main.453"},{"key":"5318_CR19","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li L-J, Li K, Fei-Fei L (2009) Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"5318_CR20","unstructured":"Radford A, Kim JW, Hallacy C, Ramesh A, Goh G, Agarwal S, Sastry G, Askell A, Mishkin P, Clark J, et\u00a0al (2021) Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp 8748\u20138763"},{"key":"5318_CR21","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2018) Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805"},{"key":"5318_CR22","unstructured":"Hendrycks D, Gimpel K (2016) Gaussian error linear units (gelus). arXiv preprint arXiv:1606.08415"},{"issue":"1","key":"5318_CR23","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s11263-014-0748-y","volume":"119","author":"X Jianxiong","year":"2016","unstructured":"Jianxiong X, Ehinger KA, James H, Antonio T, Aude O (2016) Sun database: Exploring a large collection of scene categories. Int J Comput Vis 119(1):3\u201322","journal-title":"Int J Comput Vis"},{"issue":"1","key":"5318_CR24","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna R, Zhu Y, Groth O, Johnson J, Hata K, Kravitz J, Chen S, Kalantidis Y, Li L-J, Shamma DA et al (2017) Visual genome: connecting language and vision using crowdsourced dense image annotations. Int J Comput Vis 123(1):32\u201373","journal-title":"Int J Comput Vis"},{"key":"5318_CR25","doi-asserted-by":"crossref","unstructured":"Benenson R, Popov S, Ferrari V (2019) Large-scale interactive object segmentation with human annotators. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 11700\u201311709","DOI":"10.1109\/CVPR.2019.01197"},{"key":"5318_CR26","doi-asserted-by":"crossref","unstructured":"Gain B, Bandyopadhyay D, Ekbal A (2021) IITP at WAT 2021: System description for English\u2013Hindi multimodal translation task. In: Proceedings of the 8th Workshop on Asian Translation (WAT2021)","DOI":"10.18653\/v1\/2021.wat-1.18"},{"key":"5318_CR27","doi-asserted-by":"crossref","unstructured":"Ott M, Edunov S, Baevski A, Fan A, Gross S, Ng N, Grangier D, Auli M (2019) fairseq: a fast, extensible toolkit for sequence modeling. arXiv preprint arXiv:1904.01038","DOI":"10.18653\/v1\/N19-4009"},{"key":"5318_CR28","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556"},{"key":"5318_CR29","doi-asserted-by":"crossref","unstructured":"Yue X, Sun S, Kuang Z, Wei M, Torr PHS, Zhang W, Lin D (2021) Vision transformer with progressive sampling. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 387\u2013396","DOI":"10.1109\/ICCV48922.2021.00044"},{"key":"5318_CR30","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"key":"5318_CR31","doi-asserted-by":"crossref","unstructured":"Zhang Z (2018) Improved adam optimizer for deep neural networks. In: 2018 IEEE\/ACM 26th International Symposium on Quality of Service (IWQoS), pp 1\u20132","DOI":"10.1109\/IWQoS.2018.8624183"},{"key":"5318_CR32","doi-asserted-by":"crossref","unstructured":"Papineni K, Roukos S, Ward T, Zhu W-J (2002) Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th annual meeting of the Association for Computational Linguistics, pp 311\u2013318","DOI":"10.3115\/1073083.1073135"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-023-05318-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-023-05318-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-023-05318-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,18]],"date-time":"2023-09-18T08:03:35Z","timestamp":1695024215000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-023-05318-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,9]]},"references-count":32,"journal-issue":{"issue":"16","published-print":{"date-parts":[[2023,11]]}},"alternative-id":["5318"],"URL":"https:\/\/doi.org\/10.1007\/s11227-023-05318-9","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,5,9]]},"assertion":[{"value":"17 April 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 May 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflicts of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Our experimental research does not involve any human or animal subjects and does not involve ethical issues.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}]}}