{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T17:45:41Z","timestamp":1778262341832,"version":"3.51.4"},"reference-count":35,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2017,9,1]],"date-time":"2017-09-01T00:00:00Z","timestamp":1504224000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computer Speech &amp; Language"],"published-print":{"date-parts":[[2017,9]]},"DOI":"10.1016\/j.csl.2017.01.014","type":"journal-article","created":{"date-parts":[[2017,3,15]],"date-time":"2017-03-15T14:31:52Z","timestamp":1489588312000},"page":"137-148","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":46,"special_numbering":"C","title":["On integrating a language model into neural machine translation"],"prefix":"10.1016","volume":"45","author":[{"given":"Caglar","family":"Gulcehre","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Orhan","family":"Firat","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kelvin","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kyunghyun","family":"Cho","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yoshua","family":"Bengio","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.csl.2017.01.014_bib0001","unstructured":"Bahdanau, D., Cho, K., Bengio, Y., 2014. Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473."},{"key":"10.1016\/j.csl.2017.01.014_bib0002","unstructured":"Bastien, F., Lamblin, P., Pascanu, R., Bergstra, J., Goodfellow, I., Bergeron, A., Bouchard, N., Warde-Farley, D., Bengio, Y., 2012. Theano: new features and speed improvements. Submited to the Deep Learning and Unsupervised Feature Learning NIPS 2012 Workshop."},{"key":"10.1016\/j.csl.2017.01.014_bib0003","series-title":"Proceedings of the Python for Scientific Computing Conference (SciPy)","doi-asserted-by":"crossref","DOI":"10.25080\/Majora-92bf1922-003","article-title":"Theano: a CPU and GPU math expression compiler","author":"Bergstra","year":"2010"},{"key":"10.1016\/j.csl.2017.01.014_bib0004","series-title":"Proceedings of the 16th Conference of the European Association for Machine Translation (EAMT)","first-page":"261","article-title":"Wit3: Web inventory of transcribed and translated talks","author":"Cettolo","year":"2012"},{"key":"10.1016\/j.csl.2017.01.014_bib0005","series-title":"Proceedings of the 43rd Annual Meeting on Association for Computational Linguistics","first-page":"263","article-title":"A hierarchical phrase-based model for statistical machine translation","author":"Chiang","year":"2005"},{"key":"10.1016\/j.csl.2017.01.014_sbref0004","series-title":"Proceedings of the Empiricial Methods in Natural Language Processing (EMNLP 2014)","article-title":"Learning phrase representations using RNN encoder-decoder for statistical machine translation","author":"Cho","year":"2014"},{"key":"10.1016\/j.csl.2017.01.014_bib0007","doi-asserted-by":"crossref","unstructured":"Chung, J., Cho, K., Bengio, Y., 2016. A character-level decoder without explicit segmentation for neural machine translation. arXiv preprint arXiv:1603.06147.","DOI":"10.18653\/v1\/P16-1160"},{"key":"10.1016\/j.csl.2017.01.014_bib0008","doi-asserted-by":"crossref","unstructured":"Firat, O., Cho, K., Bengio, Y., 2016. Multi-way, multilingual neural machine translation with a shared attention mechanism. arXiv preprint arXiv:1601.01073.","DOI":"10.18653\/v1\/N16-1101"},{"key":"10.1016\/j.csl.2017.01.014_bib0009","series-title":"Proceedings of The 30th International Conference on Machine Learning","first-page":"1319","article-title":"Maxout networks","author":"Goodfellow","year":"2013"},{"key":"10.1016\/j.csl.2017.01.014_bib0010","series-title":"Advances in Neural Information Processing Systems","first-page":"2348","article-title":"Practical variational inference for neural networks","author":"Graves","year":"2011"},{"key":"10.1016\/j.csl.2017.01.014_bib0011","unstructured":"Gulcehre, C., Firat, O., Xu, K., Cho, K., Barrault, L., Lin, H.-C., Bougares, F., Schwenk, H., Bengio, Y., 2015. On using monolingual corpora in neural machine translation. arXiv preprint arXiv:1503.03535."},{"key":"10.1016\/j.csl.2017.01.014_bib0012","unstructured":"Hinton, G. E., Srivastava, N., Krizhevsky, A., Sutskever, I., Salakhutdinov, R. R., 2012. Improving neural networks by preventing co-adaptation of feature detectors. arXiv preprint arXiv:1207.0580."},{"issue":"8","key":"10.1016\/j.csl.2017.01.014_bib0013","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","article-title":"Long short-term memory","volume":"9","author":"Hochreiter","year":"1997","journal-title":"Neural Comput."},{"key":"10.1016\/j.csl.2017.01.014_bib0014","doi-asserted-by":"crossref","unstructured":"Jean, S., Cho, K., Memisevic, R., Bengio, Y., 2014. On using very large target vocabulary for neural machine translation. arXiv preprint arXiv:1412.2007.","DOI":"10.3115\/v1\/P15-1001"},{"key":"10.1016\/j.csl.2017.01.014_bib0015","series-title":"Proceedings of the ACL Conference on Empirical Methods in Natural Language Processing (EMNLP)","first-page":"1700","article-title":"Recurrent continuous translation models","author":"Kalchbrenner","year":"2013"},{"key":"10.1016\/j.csl.2017.01.014_bib0016","unstructured":"Kingma, D. P., Ba, J., 2014. Adam: a method for stochastic optimization. arXiv:1412.6980 [cs.LG]."},{"key":"10.1016\/j.csl.2017.01.014_bib0017","series-title":"Statistical Machine Translation","author":"Koehn","year":"2010"},{"key":"10.1016\/j.csl.2017.01.014_bib0018","series-title":"Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology-Volume 1","first-page":"48","article-title":"Statistical phrase-based translation","author":"Koehn","year":"2003"},{"key":"10.1016\/j.csl.2017.01.014_bib0019","unstructured":"Luong, M.-T., Le, Q. V., Sutskever, I., Vinyals, O., Kaiser, L., 2015. Multi-task sequence to sequence learning. arXiv preprint arXiv:1511.06114."},{"key":"10.1016\/j.csl.2017.01.014_bib0020","doi-asserted-by":"crossref","unstructured":"Luong, M.-T., Manning, C. D., 2016. Achieving open vocabulary neural machine translation with hybrid word-character models. arXiv preprint arXiv:1604.00788.","DOI":"10.18653\/v1\/P16-1100"},{"key":"10.1016\/j.csl.2017.01.014_bib0021","doi-asserted-by":"crossref","unstructured":"Luong, T., Sutskever, I., Le, Q. V., Vinyals, O., Zaremba, W., 2014. Addressing the rare word problem in neural machine translation. arXiv preprint arXiv:1410.8206.","DOI":"10.3115\/v1\/P15-1002"},{"key":"10.1016\/j.csl.2017.01.014_bib0022","series-title":"Proceedings of the 2011 ASRU Workshop","first-page":"196","article-title":"Rnnlm-recurrent neural network language modeling toolkit","author":"Mikolov","year":"2011"},{"key":"10.1016\/j.csl.2017.01.014_bib0023","series-title":"Using variable decoding weight for language model in statistical machine translation","author":"Mohit","year":"2010"},{"key":"10.1016\/j.csl.2017.01.014_bib0024","series-title":"Proceedings of the Second International Conference on Learning Representations (ICLR 2014)","article-title":"How to construct deep recurrent neural networks","author":"Pascanu","year":"2014"},{"key":"10.1016\/j.csl.2017.01.014_bib0025","series-title":"Proceedings of the 30th International Conference on Machine Learning (ICML 2013)","article-title":"On the difficulty of training recurrent neural networks","author":"Pascanu","year":"2013"},{"key":"10.1016\/j.csl.2017.01.014_bib0026","series-title":"Computational Linguistics and Intelligent Text Processing","first-page":"107","article-title":"Morphological disambiguation of turkish text with perceptron algorithm","author":"Sak","year":"2007"},{"issue":"11","key":"10.1016\/j.csl.2017.01.014_bib0027","doi-asserted-by":"crossref","first-page":"2673","DOI":"10.1109\/78.650093","article-title":"Bidirectional recurrent neural networks","volume":"45","author":"Schuster","year":"1997","journal-title":"IEEE Trans. Signal Process."},{"issue":"3","key":"10.1016\/j.csl.2017.01.014_bib0028","doi-asserted-by":"crossref","first-page":"492","DOI":"10.1016\/j.csl.2006.09.003","article-title":"Continuous space language models","volume":"21","author":"Schwenk","year":"2007","journal-title":"Comput. Speech Lang."},{"key":"10.1016\/j.csl.2017.01.014_bib0029","doi-asserted-by":"crossref","unstructured":"Sennrich, R., Haddow, B., Birch, A., 2015. Neural machine translation of rare words with subword units. arXiv preprint arXiv:1508.07909.","DOI":"10.18653\/v1\/P16-1162"},{"key":"10.1016\/j.csl.2017.01.014_bib0030","unstructured":"Shuyo, N., 2010. Language detection library for java."},{"key":"10.1016\/j.csl.2017.01.014_bib0031","series-title":"Advances in Neural Information Processing Systems (NIPS 2014)","article-title":"Sequence to sequence learning with neural networks","author":"Sutskever","year":"2014"},{"key":"10.1016\/j.csl.2017.01.014_bib0032","article-title":"Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude","volume":"4","author":"Tieleman","year":"2012","journal-title":"COURSERA: Neural Netw. Mach. Learn."},{"key":"10.1016\/j.csl.2017.01.014_bib0033","series-title":"Proceedings of the 10th International Workshop on Spoken Language Translation (IWSLT)","first-page":"152","article-title":"Tubitak turkish-english submissions for iwslt 2013","author":"Y\u0131lmaz","year":"2013"},{"key":"10.1016\/j.csl.2017.01.014_bib0034","unstructured":"Zeiler, M. D., 2012. ADADELTA: an adaptive learning rate method. arXiv:1212.5701 [cs.LG]."},{"key":"10.1016\/j.csl.2017.01.014_bib0035","article-title":"Transfer learning for low-resource neural machine translation","volume":"abs\/1604.02201","author":"Zoph","year":"2016","journal-title":"CoRR"}],"container-title":["Computer Speech &amp; Language"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0885230816301395?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0885230816301395?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2020,10,3]],"date-time":"2020-10-03T18:58:27Z","timestamp":1601751507000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0885230816301395"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,9]]},"references-count":35,"alternative-id":["S0885230816301395"],"URL":"https:\/\/doi.org\/10.1016\/j.csl.2017.01.014","relation":{},"ISSN":["0885-2308"],"issn-type":[{"value":"0885-2308","type":"print"}],"subject":[],"published":{"date-parts":[[2017,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"On integrating a language model into neural machine translation","name":"articletitle","label":"Article Title"},{"value":"Computer Speech & Language","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.csl.2017.01.014","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2017 Elsevier Ltd. All rights reserved.","name":"copyright","label":"Copyright"}]}}