{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T18:10:47Z","timestamp":1775326247358,"version":"3.50.1"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/asru46091.2019.9003926","type":"proceedings-article","created":{"date-parts":[[2020,2,21]],"date-time":"2020-02-21T02:01:33Z","timestamp":1582250493000},"page":"964-971","source":"Crossref","is-referenced-by-count":5,"title":["Zero-Shot Code-Switching ASR and TTS with Multilingual Machine Speech Chain"],"prefix":"10.1109","author":[{"given":"Sahoko","family":"Nakayama","sequence":"first","affiliation":[{"name":"RIKEN, Center for Advanced Intelligence Project AIP,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andros","family":"Tjandra","sequence":"additional","affiliation":[{"name":"Nara Institute of Science and Technology,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sakriani","family":"Sakti","sequence":"additional","affiliation":[{"name":"RIKEN, Center for Advanced Intelligence Project AIP,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Satoshi","family":"Nakamura","sequence":"additional","affiliation":[{"name":"RIKEN, Center for Advanced Intelligence Project AIP,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref38","doi-asserted-by":"crossref","first-page":"339","DOI":"10.1162\/tacl_a_00065","article-title":"Google's multilingual neural machine translation system: Enabling zero-shot translation","volume":"5","author":"melvin","year":"2017","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"ref33","author":"luong","year":"2015","journal-title":"Effective Approaches to Attention-based Neural Machine Translation"},{"key":"ref32","author":"mcfee","year":"2017","journal-title":"librosa 0 5 0"},{"key":"ref31","author":"durette","year":"0","journal-title":"Google Text to Speech"},{"key":"ref30","author":"huang","year":"0","journal-title":"pypinyin - pinyin library in python"},{"key":"ref37","author":"yongqin","year":"2017","journal-title":"Zero-shot learning the good the bad and the ugly"},{"key":"ref36","first-page":"3","article-title":"Zero-data learning of new tasks","volume":"1","author":"larochelle","year":"2008","journal-title":"Proc Of AAAI"},{"key":"ref35","author":"paszke","year":"2017","journal-title":"Au-tomatic differentiation in pytorch"},{"key":"ref34","author":"xu","year":"2015","journal-title":"Empirical evaluation of rectified activations in convolutional network"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2016-13"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1244"},{"key":"ref12","first-page":"2691","article-title":"An investigation of acoustic models for multilingual code switching","author":"white","year":"2008","journal-title":"Proc of Inter-speech"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947482"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462180"},{"key":"ref15","author":"guo","year":"2018","journal-title":"Study of semi-supervised approaches to improving english-mandarin code-switching speech recognition"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639674"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8268950"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1558"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683480"},{"key":"ref28","first-page":"529","article-title":"Pointwise prediction for robust, adaptable japanese morphological analysis","author":"neubig","year":"2011","journal-title":"Proc of the 49th Annual Meeting of the Association for Computational Linguistics Human Language Technologies short papers-Volume 2 Association for Computational Linguistics"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682850"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICSDA.2018.8693044"},{"key":"ref3","author":"luo","year":"2018","journal-title":"Towards end-to-end code-switching speech recognition"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2016.04.044"},{"key":"ref29","author":"miura","year":"0","journal-title":"pykakasi - kakasi library in python"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/IALP.2012.28"},{"key":"ref8","first-page":"137","article-title":"Mi-crosoft Mulan-a bilingual TTS system","author":"liang","year":"2007","journal-title":"Proc 6th ISCA Workshop Speech Synth (SSW6)"},{"key":"ref7","first-page":"264","article-title":"Microsoft Mulan-a bilingual TTS system","author":"chu","year":"2003","journal-title":"Proc of ICASSP"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6289015"},{"key":"ref9","first-page":"3422","article-title":"Speech synthesis of code-mixed text","author":"sitaram","year":"2016","journal-title":"Proc of LREC"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511620867"},{"key":"ref20","article-title":"The Speech Chain: The Physics And Biology Of Spoken Language","author":"denes","year":"1993","journal-title":"Anchor Books"},{"key":"ref22","first-page":"4960","article-title":"Listen, attend and spell: a neural network for large vocabulary conversational speech recognition","author":"william","year":"2016","journal-title":"Proc of ICASSP"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"ref24","author":"li","year":"2017","journal-title":"Deep speaker an end-to-end neural speaker embedding system"},{"key":"ref23","doi-asserted-by":"crossref","first-page":"4006","DOI":"10.21437\/Interspeech.2017-1452","article-title":"Tacotron: A fully end-to-end text-to-speech synthesis model","author":"wang","year":"2017","journal-title":"Proc of Interspeech"},{"key":"ref26","doi-asserted-by":"crossref","first-page":"381","DOI":"10.21437\/Eurospeech.2003-150","article-title":"Creating corpora for speech-to-speech translation","author":"kikui","year":"2003","journal-title":"Proc of Eurospeech"},{"key":"ref25","first-page":"303","article-title":"Multilingual spoken language corpus development for communication research","volume":"12","author":"takezawa","year":"2007","journal-title":"Proc of the Association for Computational Linguistics and Chinese Language Processing"}],"event":{"name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Singapore","start":{"date-parts":[[2019,12,14]]},"end":{"date-parts":[[2019,12,18]]}},"container-title":["2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8985378\/9003727\/09003926.pdf?arnumber=9003926","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,1]],"date-time":"2025-09-01T19:25:52Z","timestamp":1756754752000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9003926\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/asru46091.2019.9003926","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}