{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T08:00:15Z","timestamp":1761897615966,"version":"3.37.3"},"reference-count":41,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,12,13]],"date-time":"2021-12-13T00:00:00Z","timestamp":1639353600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,12,13]],"date-time":"2021-12-13T00:00:00Z","timestamp":1639353600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,12,13]],"date-time":"2021-12-13T00:00:00Z","timestamp":1639353600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key R&D Program of China","doi-asserted-by":"publisher","award":["2018YFB1305200"],"award-info":[{"award-number":["2018YFB1305200"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61771333"],"award-info":[{"award-number":["61771333"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,12,13]]},"DOI":"10.1109\/asru51503.2021.9687961","type":"proceedings-article","created":{"date-parts":[[2022,2,3]],"date-time":"2022-02-03T20:31:00Z","timestamp":1643920260000},"page":"602-609","source":"Crossref","is-referenced-by-count":1,"title":["Learning Language and Speaker Information for Code-Switch Speech Synthesis with Limited Data"],"prefix":"10.1109","author":[{"given":"Mengxin","family":"Chai","sequence":"first","affiliation":[{"name":"College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shaotong","family":"Guo","sequence":"additional","affiliation":[{"name":"College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cheng","family":"Gong","sequence":"additional","affiliation":[{"name":"College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Longbiao","family":"Wang","sequence":"additional","affiliation":[{"name":"College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianwu","family":"Dang","sequence":"additional","affiliation":[{"name":"College of Intelligence and Computing, Tianjin University,Tianjin Key Laboratory of Cognitive Computing and Application,Tianjin,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ju","family":"Zhang","sequence":"additional","affiliation":[{"name":"Huiyan Technology (Tianjin) Co., Ltd.,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2070"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683519"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1235"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/S1007-0214(09)70124-5"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1006\/csla.2001.0169"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-63830-6_4"},{"key":"ref37","first-page":"2579","article-title":"Visualizing data using t-sne","volume":"9","author":"van der maaten","year":"2008","journal-title":"Journal of Machine Learning Research"},{"key":"ref36","first-page":"3422","article-title":"Speech synthesis of code-mixed text","author":"sitaram","year":"2016","journal-title":"Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16)"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-852"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682804"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472732"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/634067.634256"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682927"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054722"},{"key":"ref13","article-title":"Dynamic soft windowing and language dependent style token for code-switching end-to-end speech synthesis","author":"ruibo","year":"0","journal-title":"InterSpeech"},{"key":"ref14","article-title":"Deep voice 3: 2000-speaker neural text-to-speech","volume":"abs 1710 7654","author":"ping","year":"2017","journal-title":"CoRR"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"journal-title":"FastSpeech Fast Robust and Controllable Text to Speech","year":"2019","author":"ren","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2009.2015708"},{"key":"ref18","first-page":"3422","article-title":"Speech synthesis of code-mixed text","author":"sitaram","year":"2016","journal-title":"Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16)"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2016-13"},{"key":"ref28","article-title":"Speech synthesis for mixed-language navigation instructions","author":"chandu","year":"0","journal-title":"Interspeech 2017 2017 ISCA Edition Interspeech 2017"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2009.2015708"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054722"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CHINSL.2008.ECP.15"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TENCON.2013.6719019"},{"key":"ref29","first-page":"76","article-title":"Experiments with cross-lingual systems for synthesis of code-mixed text","author":"sitaram","year":"0","journal-title":"ISCA Speech Synthesis Workshop"},{"key":"ref5","article-title":"Turning a monolingual speaker into multilingual for a mixed-language tts","author":"ji","year":"0","journal-title":"InterSpeech"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472737"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2012.2187195"},{"key":"ref2","article-title":"An hmm-based bilingual (mandarin-english) tts","author":"liang","year":"0","journal-title":"SSW"},{"key":"ref9","article-title":"Multi-language multi-speaker acoustic modeling for lstm-rnn based statistical parametric speech synthesis","author":"bo","year":"0","journal-title":"Interspeech 2016"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2005.1415035"},{"key":"ref20","first-page":"i","article-title":"Microsoft mulan - a bilingual tts system","volume":"1","author":"chu","year":"0","journal-title":"2003 IEEE International Conference on Acoustics Speech and Signal Processing 2003 Proceedings (ICASSP'03)"},{"key":"ref22","article-title":"From multilingual to polyglot speech synthesis","author":"christof","year":"0","journal-title":"Eurospeech"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1259"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-172"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2679"},{"key":"ref23","first-page":"i\/1","article-title":"Polyglot synthesis using a mixture of monolingual corpora","volume":"1","author":"javier","year":"0","journal-title":"Proceedings (ICASSP '05) IEEE International Conference on Acoustics Speech and Signal Processing 2005"},{"journal-title":"Learning pronunciation from a foreign language in speech synthesis networks","year":"2020","author":"lee","key":"ref26"},{"key":"ref25","first-page":"5621","article-title":"Bytes are all you need: End-to-end multilingual speech recognition and synthesis with bytes","author":"bo","year":"0","journal-title":"ICASSP 2019 &#x2014; 2019 IEEE International Conference on Acoustics Speech and Signal Processing (ICASSP)"}],"event":{"name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","start":{"date-parts":[[2021,12,13]]},"location":"Cartagena, Colombia","end":{"date-parts":[[2021,12,17]]}},"container-title":["2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9687821\/9687855\/09687961.pdf?arnumber=9687961","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T16:58:24Z","timestamp":1652201904000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9687961\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,13]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/asru51503.2021.9687961","relation":{},"subject":[],"published":{"date-parts":[[2021,12,13]]}}}