{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T14:57:15Z","timestamp":1777129035375,"version":"3.51.4"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/asru46091.2019.9003853","type":"proceedings-article","created":{"date-parts":[[2020,2,21]],"date-time":"2020-02-21T07:01:33Z","timestamp":1582268493000},"page":"593-600","source":"Crossref","is-referenced-by-count":26,"title":["Speech-to-Speech Translation Between Untranscribed Unknown Languages"],"prefix":"10.1109","author":[{"given":"Andros","family":"Tjandra","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sakriani","family":"Sakti","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Satoshi","family":"Nakamura","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","author":"brian","year":"2015","journal-title":"librosa Audio and music signal analysis in python"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683480"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1558"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8268950"},{"key":"ref36","first-page":"176","article-title":"Better hypothesis testing for statistical machine translation: Controlling for optimizer instability","author":"clark","year":"2011","journal-title":"Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics Human Language Technologies Short Papers-Volume 2 Association for Computational Linguistics"},{"key":"ref35","first-page":"65","article-title":"METEOR: An automatic metric for MT evaluation with improved correlation with human judgments","author":"banerjee","year":"2005","journal-title":"Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization"},{"key":"ref34","first-page":"311","article-title":"Bleu: a method for automatic evaluation of machine translation","author":"papineni","year":"2002","journal-title":"Proceedings of the 40th Annual Meeting on Association for Computational Linguistics Association for Computational Linguistics"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8268953"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2904"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2048"},{"key":"ref13","author":"cho","year":"2019","journal-title":"VQVAE with speaker adversarial training"},{"key":"ref14","first-page":"11449","article-title":"VQVAE unsupervised unit discovery and multi -scale code2spec inverter for ze-rospeech challenge 2019","volume":"abs 1905","author":"tjandra","year":"2019","journal-title":"CoRR"},{"key":"ref15","first-page":"6306","article-title":"Neural discrete representation learning","author":"van den oord","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390294"},{"key":"ref17","author":"kingma","year":"2014","journal-title":"Adam A method for stochastic optimization"},{"key":"ref18","first-page":"2395","article-title":"Fast decoding in sequence models using discrete latent variables","author":"kaiser","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref19","article-title":"Towards a better understanding of vector quantized autoencoders","author":"roy","year":"2018","journal-title":"openreview"},{"key":"ref28","article-title":"Creating corpora for speech-to-speech translation","author":"kikui","year":"2003","journal-title":"Eighth European Conference on Speech Communication and Technology"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-1109"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164317"},{"key":"ref3","author":"wang","year":"2017","journal-title":"Tacotron Towards end-to-end speech synthesis"},{"key":"ref6","first-page":"2625","article-title":"Sequence-to-sequence models can directly translate foreign speech","author":"weiss","year":"2017","journal-title":"INTERSPEECH 2017 - 18th Annual Conference of the International Speech Communication Association"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2006.878262"},{"key":"ref5","article-title":"Listen and translate: A proof of concept for end-to-end speech-to-text translation","volume":"abs 1612 1744","author":"berard","year":"2016","journal-title":"CoRR"},{"key":"ref8","author":"ye","year":"2019","journal-title":"Direct speech-to-speech translation with a sequence-to-sequence model"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-944"},{"key":"ref2","article-title":"Neural machine translation by jointly learning to align and translate","volume":"abs 1409 473","author":"bahdanau","year":"2014","journal-title":"CoRR"},{"key":"ref9","first-page":"3169","article-title":"The zero resource speech challenge 2015","author":"versteegh","year":"2015","journal-title":"Sixteenth Annual Conference of the International Speech Communication Association"},{"key":"ref1","first-page":"577","article-title":"Attention-based models for speech recognition","author":"chorowski","year":"2015","journal-title":"Advances in Neural Information Processing Systems 28 Annual Conference on Neural Information Processing Systems 2015"},{"key":"ref20","author":"bahdanau","year":"2014","journal-title":"Neural machine translation by jointly learning to align and translate"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1166"},{"key":"ref21","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639528"},{"key":"ref26","author":"ioffe","year":"2015","journal-title":"Batch Normalization Accelerating Deep Network Training by Reducing Internal Covariate Shift"},{"key":"ref25","author":"xu","year":"2015","journal-title":"Empirical evaluation of rectified activations in convolutional network"}],"event":{"name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"SG, Singapore","start":{"date-parts":[[2019,12,14]]},"end":{"date-parts":[[2019,12,18]]}},"container-title":["2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8985378\/9003727\/09003853.pdf?arnumber=9003853","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T14:51:19Z","timestamp":1658155879000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9003853\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/asru46091.2019.9003853","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}