{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T19:07:40Z","timestamp":1769281660798,"version":"3.49.0"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"16","license":[{"start":{"date-parts":[[2023,11,2]],"date-time":"2023-11-02T00:00:00Z","timestamp":1698883200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,2]],"date-time":"2023-11-02T00:00:00Z","timestamp":1698883200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-17479-z","type":"journal-article","created":{"date-parts":[[2023,11,2]],"date-time":"2023-11-02T05:07:00Z","timestamp":1698901620000},"page":"48377-48397","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Pre-training neural machine translation with alignment information via optimal transport"],"prefix":"10.1007","volume":"83","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1306-8453","authenticated-orcid":false,"given":"Xueping","family":"Su","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xingkai","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jie","family":"Ren","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yunhong","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Matthias","family":"R\u00e4tsch","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,11,2]]},"reference":[{"key":"17479_CR1","doi-asserted-by":"crossref","unstructured":"Cho K, van Merrienboer B, Gulcehre C et al (2014) Learning phrase representations using RNN encoder-decoder for statistical machine translation. In: Proceedings of the 2014 conference on empirical methods in natural language processing (emnlp).\u00a0association for computational linguistics 1724","DOI":"10.3115\/v1\/D14-1179"},{"issue":"25","key":"17479_CR2","doi-asserted-by":"publisher","first-page":"33701","DOI":"10.1007\/s11042-021-11345-6","volume":"80","author":"M Bansal","year":"2021","unstructured":"Bansal M, Lobiyal DK (2021) Multilingual sequence to sequence convolutional machine translation. Multimed Tools Appl 80(25):33701\u201333726","journal-title":"Multimed Tools Appl"},{"key":"17479_CR3","doi-asserted-by":"crossref","unstructured":"Chen Y, Kedzie C, Nair S et al (2021) Cross-language sentence selection via data augmentation and rationale training. In: Proceedings of the 59th annual meeting of the association for computational linguistics and the 11th international joint conference on natural language processing, vol 1: Long Papers, pp 3881\u20133895","DOI":"10.18653\/v1\/2021.acl-long.300"},{"key":"17479_CR4","first-page":"489","volume":"2018","author":"S Edunov","year":"2018","unstructured":"Edunov S, Ott M, Auli M et al (2018) Understanding Back-Translation at Scale. Proc Conf Empir Methods Nat Lang Process 2018:489\u2013500","journal-title":"Proc Conf Empir Methods Nat Lang Process"},{"issue":"4","key":"17479_CR5","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1080\/09540099108946592","volume":"3","author":"L Chrisman","year":"1991","unstructured":"Chrisman L (1991) Learning recursive distributed representations for holistic computation[J]. Connect Sci 3(4):345\u2013366","journal-title":"Connect Sci"},{"key":"17479_CR6","first-page":"1700","volume":"2013","author":"N Kalchbrenner","year":"2013","unstructured":"Kalchbrenner N, Blunsom P (2013) Recurrent continuous translation models. Proc Conf Empir Methods Nat Lang Process (EMNLP) 2013:1700\u20131709","journal-title":"Proc Conf Empir Methods Nat Lang Process (EMNLP)"},{"issue":"3","key":"17479_CR7","first-page":"100","volume":"9","author":"D Bahdanau","year":"2014","unstructured":"Bahdanau D, Cho K, Bengio Y (2014) Neural machine translation by jointly learning to align and translate. Computer Science 9(3):100\u2013121","journal-title":"Computer Science"},{"key":"17479_CR8","unstructured":"Vaswani A, Shazeer N, Parmar N et al (2017) Attention is all you need. In: Proceedings of the 31st international conference on neural information processing systems, pp 6000\u20136010"},{"key":"17479_CR9","doi-asserted-by":"crossref","unstructured":"Chen M X, Firat O, Bapna A et al (2018) The best of both worlds: Combining recent advances in neural machine translation. In: Proceedings of the 56th annual meeting of the association for computational linguistics, vol 1: Long Papers,\u00a0pp\u00a076\u201386","DOI":"10.18653\/v1\/P18-1008"},{"key":"17479_CR10","doi-asserted-by":"publisher","first-page":"1574","DOI":"10.1109\/TASLP.2020.2995270","volume":"28","author":"Y Fan","year":"2020","unstructured":"Fan Y, Tian F, Xia Y et al (2020) Searching better architectures for neural machine translation. IEEE\/ACM Trans Audio Speech Lang Process 28:1574\u20131585","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"17479_CR11","doi-asserted-by":"publisher","first-page":"330","DOI":"10.1109\/TASLP.2021.3138714","volume":"30","author":"K Chen","year":"2021","unstructured":"Chen K, Wang R, Utiyama M et al (2021) Integrating prior translation knowledge into neural machine translation. IEEE\/ACM Trans Audio Speech Lang Process 30:330\u2013339","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"17479_CR12","doi-asserted-by":"publisher","first-page":"2829","DOI":"10.1109\/TASLP.2021.3105798","volume":"29","author":"C Leong","year":"2021","unstructured":"Leong C, Liu X, Wong DF et al (2021) Exploiting translation model for parallel corpus mining. IEEE\/ACM Trans Audio Speech Lang Process 29:2829\u20132839","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"17479_CR13","doi-asserted-by":"publisher","first-page":"1864","DOI":"10.1109\/TASLP.2020.2999724","volume":"28","author":"H Li","year":"2020","unstructured":"Li H, Huang G, Cai D et al (2020) Neural machine translation with noisy lexical constraints. IEEE\/ACM Trans Audio Speech Lang Process 28:1864\u20131874","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"17479_CR14","doi-asserted-by":"crossref","unstructured":"Imamura K, Fujita A, Sumita E (2018) Enhancement of encoder and attention using target monolingual corpora in neural machine translation. In: Proceedings of the 2nd workshop on neural machine translation and generation, pp 55\u201363","DOI":"10.18653\/v1\/W18-2707"},{"key":"17479_CR15","doi-asserted-by":"publisher","first-page":"2649","DOI":"10.18653\/v1\/2020.emnlp-main.210","volume":"2020","author":"Z Lin","year":"2020","unstructured":"Lin Z, Pan X, Wang M et al (2020) Pre-training Multilingual Neural Machine Translation by Leveraging Alignment Information. Proc Conf Emp Methods Nat Lang Process (EMNLP) 2020:2649\u20132663","journal-title":"Proc Conf Emp Methods Nat Lang Process (EMNLP)"},{"key":"17479_CR16","doi-asserted-by":"crossref","unstructured":"Pan X, Wang M, Wu L et al (2021) Contrastive learning for many-to-many multilingual neural machine translation. In: Proceedings of the 59th annual meeting of the association for computational linguistics and the 11th international joint conference on natural language processing, vol 1: Long Papers,\u00a0pp 244\u2013258","DOI":"10.18653\/v1\/2021.acl-long.21"},{"key":"17479_CR17","first-page":"452","volume":"2018","author":"S Kobayashi","year":"2018","unstructured":"Kobayashi S (2018) Contextual Augmentation: Data Augmentation by Words with Paradigmatic Relations. Proc Conf North Am Chapter Assoc Comput Linguist: Human Language Technologies (NAACL) 2018:452\u2013457","journal-title":"Proc Conf North Am Chapter Assoc Comput Linguist: Human Language Technologies (NAACL)"},{"key":"17479_CR18","unstructured":"Kenton JDMWC, Toutanova LK\u00a0(2019) BERT: Pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL-HLT, pp 4171\u20134186"},{"key":"17479_CR19","first-page":"5926","volume":"2019","author":"K Song","year":"2019","unstructured":"Song K, Tan X, Qin T et al (2019) MASS: Masked Sequence to Sequence Pre-training for Language Generation. Proc Int Conf Learning Representations (ICLR) 2019:5926\u20135936","journal-title":"Proc Int Conf Learning Representations (ICLR)"},{"key":"17479_CR20","unstructured":"Conneau A, Lample G (2019) Cross-lingual language model pre-training. In: Proceedings of the 33rd international conference on neural information processing systems, pp 7059\u20137069"},{"issue":"05","key":"17479_CR21","first-page":"9378","volume":"34","author":"J Yang","year":"2020","unstructured":"Yang J, Wang M, Zhou H, Zhao C, Zhang W, Yu Y, Li L (2020) Towards making the most of bert in neural machine translation. Proc AAAI Conf Artif Intell 34(05):9378\u20139385","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"17479_CR22","doi-asserted-by":"publisher","first-page":"726","DOI":"10.1162\/tacl_a_00343","volume":"8","author":"Y Liu","year":"2020","unstructured":"Liu Y, Gu J, Goyal N, Li X, Edunov S, Ghazvininejad M, Lewis M, Zettlemoyer L (2020) Multilingual denoising pre-training for neural machine translation. Trans Assoc Comput Linguist 8:726\u2013742","journal-title":"Trans Assoc Comput Linguist"},{"key":"17479_CR23","unstructured":"Peyr\u00e9 G, Cuturi M (2017) Computational optimal transport. In: Center for Research in economics and statistics working papers, pp 2017\u20132086"},{"key":"17479_CR24","doi-asserted-by":"crossref","unstructured":"Sennrich R, Haddow B, Birch A (2016) Neural machine translation of rare words with subword units. In: 54th annual meeting of the association for computational linguistics. Association for Computational Linguistics (ACL), pp 1715\u20131725","DOI":"10.18653\/v1\/P16-1162"},{"issue":"2","key":"17479_CR25","first-page":"164","volume":"2020","author":"N Kitaev","year":"2013","unstructured":"Kitaev N, Kaiser \u0141, Levskaya A (2013) Reformer: The efficient transformer. IComputer Science 2020(2):164\u2013168","journal-title":"IComputer Science"},{"issue":"05","key":"17479_CR26","first-page":"8285","volume":"34","author":"Y Li","year":"2020","unstructured":"Li Y, Wang Q, Xiao T et al (2020) Neural machine translation with joint representation. Proc AAAI Conf Artif Intell 34(05):8285\u20138292","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"17479_CR27","doi-asserted-by":"crossref","unstructured":"Tompson J, Goroshin R, Jain A et al (2015) Efficient object localization using convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 648\u2013656","DOI":"10.1109\/CVPR.2015.7298664"},{"key":"17479_CR28","doi-asserted-by":"publisher","first-page":"1864","DOI":"10.1109\/TASLP.2020.2999724","volume":"28","author":"H Li","year":"2020","unstructured":"Li H, Huang G, Cai D et al (2020) Neural machine translation with noisy lexical constraints. IEEE\/ACM Trans Audio, Speech, and Lang Process 28:1864\u20131874","journal-title":"IEEE\/ACM Trans Audio, Speech, and Lang Process"},{"key":"17479_CR29","doi-asserted-by":"crossref","unstructured":"Liu X, Wang C (2021) An empirical study on hyperparameter optimization for fine-tuning pre-trained language models. In: Proceedings of the 59th annual meeting of the association for computational linguistics and the 11th international joint conference on natural language processing, vol 1: Long Papers, pp 2286\u20132300","DOI":"10.18653\/v1\/2021.acl-long.178"},{"key":"17479_CR30","first-page":"6","volume":"5","author":"D Kinga","year":"2015","unstructured":"Kinga D, Adam JB (2015) A method for stochastic optimization. Int Conf Learning Representations (ICLR) 5:6","journal-title":"Int Conf Learning Representations (ICLR)"},{"key":"17479_CR31","unstructured":"Gehring J, Auli M, Grangier D et al (2017) Convolutional sequence to sequence learning. In: Proceedings of the 34th international conference on machine learning, vol 70, pp\u00a01243\u20131252"},{"key":"17479_CR32","unstructured":"Wu F, Fan A, Baevski A et al (2018) Pay less attention with lightweight and dynamic convolutions. In: International conference on learning representations"},{"key":"17479_CR33","unstructured":"Sutskever I, Vinyals O, Le QV (2014) Sequence to sequence learning with neural networks. In: Proceedings of the 27th international conference on neural information processing systems, vol 2, pp 3104\u20133112"},{"key":"17479_CR34","unstructured":"Ghorbani B, Firat O, Freitag M et al (2021) Scaling laws for neural machine translation. In: International conference on learning representations"},{"key":"17479_CR35","doi-asserted-by":"crossref","unstructured":"Wei X, Yu H, Hu Y et al (2022) Learning to generalize to more: continuous semantic augmentation for neural machine translation. In: Proceedings of the 60th annual meeting of the association for computational linguistics, vol 1: Long Papers, pp 7930\u20137944","DOI":"10.18653\/v1\/2022.acl-long.546"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17479-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-17479-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17479-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,7]],"date-time":"2024-05-07T11:31:24Z","timestamp":1715081484000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-17479-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,2]]},"references-count":35,"journal-issue":{"issue":"16","published-online":{"date-parts":[[2024,5]]}},"alternative-id":["17479"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-17479-z","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,11,2]]},"assertion":[{"value":"6 June 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 August 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 October 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 November 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}