{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T13:51:27Z","timestamp":1767707487390,"version":"3.37.3"},"reference-count":32,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"JSPS KAKENHI","award":["JP17H06101","JP17K00237"],"award-info":[{"award-number":["JP17H06101","JP17K00237"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/taslp.2020.2986886","type":"journal-article","created":{"date-parts":[[2020,4,20]],"date-time":"2020-04-20T19:56:47Z","timestamp":1587412607000},"page":"1342-1355","source":"Crossref","is-referenced-by-count":33,"title":["End-to-End Speech Translation With Transcoding by Multi-Task Learning for Distant Language Pairs"],"prefix":"10.1109","volume":"28","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9693-3785","authenticated-orcid":false,"given":"Takatomo","family":"Kano","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5509-8963","authenticated-orcid":false,"given":"Sakriani","family":"Sakti","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6956-3803","authenticated-orcid":false,"given":"Satoshi","family":"Nakamura","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref32","first-page":"1979","article-title":"Optimizing for sentence-level BLEU+1 yields short translations","author":"nakov","year":"0","journal-title":"Proc COLING 24th Int Conf Comput Linguistics Techn Papers"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"ref30","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"0","journal-title":"Proc 33rd Int Conf Learn Represent"},{"key":"ref10","article-title":"Listen and translate: A proof of concept for end-to-end speech-to-text translation","author":"berard","year":"2016","journal-title":"arXiv 1612 01744"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461690"},{"key":"ref12","first-page":"2625","article-title":"Sequence-to-sequence models can directly translate foreign speech","author":"weiss","year":"0","journal-title":"Proc INTERSPEECH 14th Annu Conf Int Speech Commun Assoc"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/E17-2076"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1177\/105971239400300102"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2016.2563981"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1044"},{"key":"ref18","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462506"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2006.1660081"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2006.878262"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-898"},{"key":"ref6","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"0","journal-title":"Proc 3rd Int Conf Learn Representations"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1162"},{"key":"ref5","first-page":"577","article-title":"Attention-based models for speech recognition","author":"chorowski","year":"0","journal-title":"Proc Adv Neural Inf Process Syst Annu Conf Neural Inf Process Syst"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-1109"},{"key":"ref7","first-page":"4006","article-title":"Tacotron: Towards end-to-end speech synthesis","author":"wang","year":"0","journal-title":"Proc INTERSPEECH 14th Annu Conf Int Speech Commun Assoc"},{"key":"ref2","first-page":"2614","article-title":"Generalizing continuous-space translation of paralinguistic information","author":"kano","year":"0","journal-title":"Proc INTERSPEECH 14th Annu Conf Int Speech Commun Assoc"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.3115\/1557769.1557821"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.860774"},{"key":"ref20","article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","author":"chung","year":"2014","journal-title":"Proc Neural Inf Process Syst"},{"key":"ref22","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"0","journal-title":"Proc Adv Neural Inf Process Syst Annu Conf Neural Inf Process Syst"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1166"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"article-title":"Layer normalization","year":"2016","author":"ba","key":"ref23"},{"key":"ref26","first-page":"381","article-title":"Creating corpora for speech-to-speech translation","author":"kikui","year":"0","journal-title":"Proc 8th Eur Conf Speech Commun Technol EUROSPEECH INTERSPEECH"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.21236\/ADA461156"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/8938144\/09072502.pdf?arnumber=9072502","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T17:31:20Z","timestamp":1651080680000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9072502\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/taslp.2020.2986886","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"type":"print","value":"2329-9290"},{"type":"electronic","value":"2329-9304"}],"subject":[],"published":{"date-parts":[[2020]]}}}