{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,13]],"date-time":"2026-02-13T21:38:22Z","timestamp":1771018702092,"version":"3.50.1"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"1-2","license":[{"start":{"date-parts":[[2017,5,23]],"date-time":"2017-05-23T00:00:00Z","timestamp":1495497600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100006321","name":"National Institutes of Natural Sciences","doi-asserted-by":"publisher","award":["KAKENHI 16H05872"],"award-info":[{"award-number":["KAKENHI 16H05872"]}],"id":[{"id":"10.13039\/501100006321","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001695","name":"Japan Science and Technology Corporation","doi-asserted-by":"publisher","award":["CREST"],"award-info":[{"award-number":["CREST"]}],"id":[{"id":"10.13039\/501100001695","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Translation"],"published-print":{"date-parts":[[2017,6]]},"DOI":"10.1007\/s10590-017-9197-z","type":"journal-article","created":{"date-parts":[[2017,5,23]],"date-time":"2017-05-23T11:29:40Z","timestamp":1495538980000},"page":"49-64","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":30,"title":["Zero-resource machine translation by multimodal encoder\u2013decoder network with multimedia pivot"],"prefix":"10.1007","volume":"31","author":[{"given":"Hideki","family":"Nakayama","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Noriki","family":"Nishida","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,5,23]]},"reference":[{"key":"9197_CR1","unstructured":"Bergsma S, Van Durme B (2011) Learning bilingual lexicons using the visual similarity of labeled web images. In: Proc. IJCAI, pp 1764\u20131769"},{"key":"9197_CR2","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li LJ, Li K, Fei-Fei L (2009) ImageNet: a Large-scale Hierarchical Image Database. In: Proc. IEEE CVPR","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"9197_CR3","doi-asserted-by":"crossref","unstructured":"Elliott D, Frank S, Sima\u2019an K, Specia L (2016) Multi30K: multilingual English\u2013German image descriptions. In: Proceedings of the 5th ACL Workshop on Vision and Language, pp 70\u201374","DOI":"10.18653\/v1\/W16-3210"},{"key":"9197_CR4","doi-asserted-by":"crossref","unstructured":"Firat O, Sankaran B, Al-Onaizan Y, Vural FTY, Cho K (2016) Zero-resource translation with multi-lingual neural machine translation. In: Proc. EMNLP, pp 268\u2013277","DOI":"10.18653\/v1\/D16-1026"},{"key":"9197_CR5","unstructured":"Frome A, Corrado G, Shlens J (2013) Devise: a deep visual-semantic embedding model. In: Proc. NIPS, pp 1\u201311"},{"key":"9197_CR6","doi-asserted-by":"crossref","unstructured":"Funaki R, Nakayama H (2015) Image-mediated learning for zero-shot cross-lingual document retrieval. In: Proc. EMNLP, pp 585\u2013590","DOI":"10.18653\/v1\/D15-1070"},{"key":"9197_CR7","unstructured":"Gr\u00fcbinger M, Clough P, M\u00fcller H, Deselaers T (2006) The IAPR TC-12 benchmark: a new evaluation resource for visual information systems. In: Proc. LREC, pp 13\u201323"},{"issue":"12","key":"9197_CR8","doi-asserted-by":"crossref","first-page":"2639","DOI":"10.1162\/0899766042321814","volume":"16","author":"DR Hardoon","year":"2004","unstructured":"Hardoon DR, Szedmak S, Shawe-taylor J (2004) Canonical correlation analysis: an overview with application to learning methods. Neural Comput 16(12):2639\u20132664","journal-title":"Neural Comput"},{"key":"9197_CR9","doi-asserted-by":"crossref","unstructured":"Hitschler J, Riezler S (2016) Multimodal pivots for image caption translation. In: Proc. ACL, pp 2399\u20132409","DOI":"10.18653\/v1\/P16-1227"},{"issue":"8","key":"9197_CR10","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1162\/neco.1997.9.1.1","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1\u201332","journal-title":"Neural Comput"},{"key":"9197_CR11","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1093\/biomet\/28.3-4.321","volume":"28","author":"H Hotelling","year":"1936","unstructured":"Hotelling H (1936) Relations between two sets of variants. Biometrika 28:321\u2013377","journal-title":"Biometrika"},{"key":"9197_CR12","doi-asserted-by":"crossref","unstructured":"Huang PY, Liu F, Shiang SR, Oh J, Dyer C (2016) Attention-based multimodal neural machine translation. In: Proc. the first conference on machine translation (WMT), vol\u00a02, pp 639\u2013645","DOI":"10.18653\/v1\/W16-2360"},{"key":"9197_CR13","doi-asserted-by":"crossref","unstructured":"Jia Y, Shelhamer E, Donahue J, Karayev S, Long J, Girshick R, Guadarrama S, Darrell T (2014) Caffe : convolutional architecture for fast feature embedding. In: ACM conference on multimedia, pp 675\u2013678","DOI":"10.1145\/2647868.2654889"},{"key":"9197_CR14","doi-asserted-by":"crossref","unstructured":"Johnson J, Karpathy A, Fei-Fei L (2016) DenseCap: fully convolutional localization networks for dense captioning. In: Proc. IEEE CVPR","DOI":"10.1109\/CVPR.2016.494"},{"key":"9197_CR15","doi-asserted-by":"crossref","unstructured":"Kiela D, Vulic I, Clark S (2015) Visual bilingual lexicon induction with transferred ConvNet features. In: Proc. EMNLP, pp 148\u2013158","DOI":"10.18653\/v1\/D15-1015"},{"key":"9197_CR16","unstructured":"Kingma DP, Ba JL (2015) Adam: a method for stochastic optimization. In: Proc. ICLR"},{"key":"9197_CR17","unstructured":"Kiros R, Salakhutdinov R, Zemel RS (2015) Unifying visual-semantic embeddings with multimodal neural language models. Transactions of the Association for Computational Linguistics (TACL)"},{"key":"9197_CR18","first-page":"79","volume":"11","author":"P Koehn","year":"2005","unstructured":"Koehn P (2005) Europarl: a parallel corpus for statistical machine translation. Proc Mach Transl Summit 11:79\u201386","journal-title":"Proc Mach Transl Summit"},{"key":"9197_CR19","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511815829","volume-title":"Statistical machine translation","author":"P Koehn","year":"2009","unstructured":"Koehn P (2009) Statistical machine translation. Cambridge University Press, Cambridge"},{"key":"9197_CR20","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) ImageNet classification with deep convolutional neural networks. In: Proc. NIPS, pp 1097\u20131105"},{"key":"9197_CR21","doi-asserted-by":"crossref","unstructured":"Lin CY, Och FJ (2004) Orange: a method for evaluating automatic evaluation metrics for machine translation. In: Proc. COLING, pp 501\u2013507","DOI":"10.3115\/1220355.1220427"},{"key":"9197_CR22","unstructured":"Luong MT, Le QV, Sutskever I, Vinyals O, Kaiser L (2016) Multi-task sequence to sequence learning. In: Proc. ICLR"},{"key":"9197_CR23","unstructured":"Oard D (1999) Issues in cross-language retrieval from document image collections. In: Proceedings of symposium on document image understanding technology, pp 229\u2013234"},{"key":"9197_CR24","unstructured":"Papineni K, Roukos S, Ward T, Zhu Wj (2002) BLEU : a method for automatic evaluation of machine translation. In: Proc. ACL, pp 311\u2013318"},{"key":"9197_CR25","doi-asserted-by":"crossref","unstructured":"Rajendran J, Khapra MM, Chandar S, Ravindran B (2016) Bridge correlational neural networks for multilingual multimodal representation learning. In: Proc. NAACL-HLT, pp 171\u2013181","DOI":"10.18653\/v1\/N16-1021"},{"key":"9197_CR26","unstructured":"Riesa J, Marcu D (2012) Automatic parallel fragment extraction from noisy data. In: Proc. NAACL, pp 538\u2013542"},{"key":"9197_CR27","unstructured":"Saha A, Khapra MM, Chandar S, Rajendran J, Cho K (2016) A correlational encoder decoder architecture for pivot based sequence generation. In: Proc. COLING"},{"key":"9197_CR28","doi-asserted-by":"crossref","unstructured":"Shen S, Cheng Y, He Z, He W, Wu H, Sun M, Liu Y (2016) Minimum risk training for neural machine translation. In: Proc. ACL, pp 1683\u20131692","DOI":"10.18653\/v1\/P16-1159"},{"key":"9197_CR29","doi-asserted-by":"crossref","unstructured":"Silberer C, Lapata M (2014) Learning grounded meaning representations with autoencoders. In: Proc. ACL, pp 721\u2013732","DOI":"10.3115\/v1\/P14-1068"},{"key":"9197_CR30","unstructured":"Simonyan K, Zisserman A (2015) Very deep convolutional networks for large-scale image recoginition. In: Proc. ICLR"},{"key":"9197_CR31","unstructured":"Sutskever I, Vinyals O, Le QV (2014) Sequence to sequence learning with neural networks. In: Proc. NIPS, pp 3104\u20133112"},{"key":"9197_CR32","unstructured":"Taeger W (2011) The sentence-aligned European Patent Corpus. In: Proc. EAMT, pp 177\u2013184"},{"key":"9197_CR33","unstructured":"Udupa R, Khapra MM (2010) Improving the multilingual user experience of wikipedia using cross-language name search. In: Proc. NAACL, pp 492\u2013500"},{"key":"9197_CR34","unstructured":"Uszkoreit J, Ponte J, Popat AC, Dubiner M (2010) Large scale parallel document mining for machine translation. In: Proc. COLING, pp 1101\u20131109"},{"key":"9197_CR35","doi-asserted-by":"crossref","unstructured":"Vinyals O, Toshev A, Bengio S, Erhan D (2015) Show and tell : a neural image caption generator. In: Proc. IEEE CVPR","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"9197_CR36","doi-asserted-by":"crossref","unstructured":"Vuli I, Kiela D, Clark S, Moens MF (2016) Multi-modal representations for improved bilingual lexicon learning. In: Proc. ACL, pp 188\u2013194","DOI":"10.18653\/v1\/P16-2031"},{"issue":"3","key":"9197_CR37","doi-asserted-by":"crossref","first-page":"165","DOI":"10.1007\/s10590-008-9041-6","volume":"21","author":"H Wu","year":"2007","unstructured":"Wu H, Wang H (2007) Pivot language approach for phrase-based statistical machine translation. Mach Transl 21(3):165\u2013181","journal-title":"Mach Transl"},{"key":"9197_CR38","doi-asserted-by":"crossref","unstructured":"Wu H, Wang H (2009) Revisiting pivot language approach for machine translation. In: Proc. IJCNLP-ACL, pp 154\u2013162","DOI":"10.3115\/1687878.1687902"},{"key":"9197_CR39","first-page":"67","volume":"2","author":"P Young","year":"2014","unstructured":"Young P, Lai A, Hodosh M, Hockenmaier J (2014) From Image descriptions to visual denotations: new similarity metrics for semantic inference over event descriptions. Trans ACL 2:67\u201378","journal-title":"Trans ACL"}],"container-title":["Machine Translation"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10590-017-9197-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10590-017-9197-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10590-017-9197-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,24]],"date-time":"2019-09-24T19:56:21Z","timestamp":1569354981000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10590-017-9197-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,5,23]]},"references-count":39,"journal-issue":{"issue":"1-2","published-print":{"date-parts":[[2017,6]]}},"alternative-id":["9197"],"URL":"https:\/\/doi.org\/10.1007\/s10590-017-9197-z","relation":{},"ISSN":["0922-6567","1573-0573"],"issn-type":[{"value":"0922-6567","type":"print"},{"value":"1573-0573","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,5,23]]}}}