{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T14:57:14Z","timestamp":1777129034371,"version":"3.51.4"},"reference-count":20,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,19]]},"DOI":"10.1109\/slt48900.2021.9383496","type":"proceedings-article","created":{"date-parts":[[2021,3,25]],"date-time":"2021-03-25T20:46:54Z","timestamp":1616705214000},"page":"958-965","source":"Crossref","is-referenced-by-count":42,"title":["Transformer-Based Direct Speech-To-Speech Translation with Transcoder"],"prefix":"10.1109","author":[{"given":"Takatomo","family":"Kano","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sakriani","family":"Sakti","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Satoshi","family":"Nakamura","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2015","journal-title":"3rd International Conference on Learning Representations ICLR 2015 Conference Track Proceedings"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21236\/ADA461156"},{"key":"ref12","first-page":"4006","article-title":"Tacotron: Towards end-to-end speech synthesis","author":"wang","year":"2017","journal-title":"INTERSPEECH 2017 - 18th Annual Conference of the International Speech Communication Association"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.2986886"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-503"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1145"},{"key":"ref16","article-title":"Creating corpora for speech-to-speech translation","author":"kikui","year":"2003","journal-title":"EUROSPEECH 2003 - INTERSPEECH 2003 8th European Conference on Speech Communication and Technology"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2006.878262"},{"key":"ref18","first-page":"311","article-title":"Bleu: a method for automatic evaluation of machine translation","author":"papineni","year":"2002","journal-title":"Proceedings of the 40th Annual Meeting on Association for Computational Linguistics  - ACL '02"},{"key":"ref19","first-page":"85","article-title":"Meteor 1.3: Automatic metric for reliable optimization and evaluation of machine translation systems","author":"denkowski","year":"2011","journal-title":"Proceedings of the Sixth Workshop on Statistical Machine Translation WMT"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1951"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461690"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00270"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-944"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-demos.34"},{"key":"ref7","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Advances in Neural Information Processing Systems 30 Annual Conference on Neural Information Processing Systems"},{"key":"ref2","article-title":"Listen and translate: A proof of concept for end-to-end speech-to-text translation","author":"b\u00e9rard","year":"2016","journal-title":"CoRR"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.860774"},{"key":"ref9","first-page":"577","article-title":"Attention-based models for speech recognition","author":"chorowski","year":"2015","journal-title":"Advances in Neural Information Processing Systems 28 Annual Conference on Neural Information Processing Systems 2015"},{"key":"ref20","first-page":"22","article-title":"Using spoken word posterior features in neural machine translation","volume":"21","author":"osamura","year":"2018","journal-title":"Proceedings of the 15th International Conference on Spoken Language Translation IWSLT"}],"event":{"name":"2021 IEEE Spoken Language Technology Workshop (SLT)","location":"Shenzhen, China","start":{"date-parts":[[2021,1,19]]},"end":{"date-parts":[[2021,1,22]]}},"container-title":["2021 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9383468\/9383452\/09383496.pdf?arnumber=9383496","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,5,4]],"date-time":"2021-05-04T21:59:52Z","timestamp":1620165592000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9383496\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,19]]},"references-count":20,"URL":"https:\/\/doi.org\/10.1109\/slt48900.2021.9383496","relation":{},"subject":[],"published":{"date-parts":[[2021,1,19]]}}}