{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T07:53:52Z","timestamp":1761897232197,"version":"3.28.0"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,5]]},"DOI":"10.1109\/icassp40776.2020.9054722","type":"proceedings-article","created":{"date-parts":[[2020,4,9]],"date-time":"2020-04-09T16:21:13Z","timestamp":1586449273000},"page":"7614-7618","source":"Crossref","is-referenced-by-count":13,"title":["End-to-End Code-Switching TTS with Cross-Lingual Language Model"],"prefix":"10.1109","author":[{"given":"Xuehao","family":"Zhou","sequence":"first","affiliation":[]},{"given":"Xiaohai","family":"Tian","sequence":"additional","affiliation":[]},{"given":"Grandee","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Rohan Kumar","family":"Das","sequence":"additional","affiliation":[]},{"given":"Haizhou","family":"Li","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","article-title":"Libritts: A corpus derived from lib- rispeech for text-to-speech","author":"zen","year":"2019","journal-title":"arXiv 1904 02882"},{"key":"ref32","article-title":"Thchs-30: A free chinese speech corpus","author":"wang","year":"2015","journal-title":"arXiv 1512 01882"},{"journal-title":"Duelling languages Grammatical structure in codeswitching","year":"1997","author":"myers-scotton","key":"ref31"},{"article-title":"Opensubtitles2016: Extracting large parallel corpora from movie and tv subtitles","year":"2016","author":"lison","key":"ref30"},{"article-title":"1534-1,&#x201D;method for the subjective assessment of intermediate sound quality (mushra)","year":"2001","author":"recommendation","key":"ref36"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1121\/1.4954653"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2064307"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682927"},{"key":"ref11","article-title":"Tacotron: Towards end-to-end speech synthesis","author":"wang","year":"2017","journal-title":"arXiv 1703 10135"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-3191"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2668"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683862"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-3177"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref17","first-page":"2962","article-title":"Deep voice 2: Multi-speaker neural text-to-speech","author":"gibiansky","year":"2017","journal-title":"NIPS"},{"key":"ref18","first-page":"1001910029","article-title":"Neural voice cloning with afew samples","author":"arik","year":"2018","journal-title":"NIPS"},{"key":"ref19","article-title":"Fitting new speakers based on a short untranscribed sample","author":"nachmani","year":"2018","journal-title":"arXiv 1802 06984"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9004035"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2016-13"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683678"},{"key":"ref3","first-page":"3422","article-title":"Speech synthesis of code-mixed text","author":"sitaram","year":"2016","journal-title":"LREC"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1259"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1382"},{"key":"ref5","first-page":"i","article-title":"Microsoft mulan-a bilingual tts system","volume":"1","author":"chu","year":"2003","journal-title":"IEEE ICASSP"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2005.1415035"},{"key":"ref7","doi-asserted-by":"crossref","DOI":"10.21437\/Eurospeech.1999-203","article-title":"From multilingual to polyglot speech synthesis","author":"traber","year":"1999","journal-title":"Eurospeech"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2009.2015708"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TENCON.2013.6719019"},{"key":"ref1","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2012-287","article-title":"Turning a monolingual speaker into multilingual for a mixed-language tts","author":"he","year":"2012","journal-title":"InterSpeech"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853591"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164317"},{"key":"ref21","first-page":"4480","article-title":"Transfer learning from speaker verification to multispeaker text- to-speech synthesis","author":"jia","year":"2018","journal-title":"NIPS"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461829"},{"key":"ref23","article-title":"Wavenet: A generative model for raw audio","author":"den oord","year":"2016","journal-title":"arXiv 1609 03499"},{"key":"ref26","article-title":"A robust selflearning method for fully unsupervised cross-lingual mappings of word embeddings","author":"artetxe","year":"2018","journal-title":"arXiv 1805 06297"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00051"}],"event":{"name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2020,5,4]]},"location":"Barcelona, Spain","end":{"date-parts":[[2020,5,8]]}},"container-title":["ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9040208\/9052899\/09054722.pdf?arnumber=9054722","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,29]],"date-time":"2023-09-29T15:29:37Z","timestamp":1696001377000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9054722\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,5]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/icassp40776.2020.9054722","relation":{},"subject":[],"published":{"date-parts":[[2020,5]]}}}