{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,29]],"date-time":"2025-05-29T05:03:20Z","timestamp":1748495000059},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,5]]},"DOI":"10.1109\/icassp40776.2020.9053642","type":"proceedings-article","created":{"date-parts":[[2020,4,9]],"date-time":"2020-04-09T16:21:13Z","timestamp":1586449273000},"page":"7629-7633","source":"Crossref","is-referenced-by-count":5,"title":["Speaker Adaptation of a Multilingual Acoustic Model for Cross-Language Synthesis"],"prefix":"10.1109","author":[{"given":"Ivan","family":"Himawan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sandesh","family":"Aryal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Iris","family":"Ouyang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sam","family":"Kang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pierre","family":"Lanchantin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Simon","family":"King","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"journal-title":"Tech Rep","article-title":"Bs. 1534-1 method for the subjective assessment of intermediate quality level of coding systems","year":"2003","key":"ref30"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1769"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2668"},{"key":"ref12","article-title":"New approach to polyglot synthesis: How to speak any language with anyone&#x2019;s voice","author":"latorre","year":"2006","journal-title":"Proc of Multilingual Speech and Language Processing"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683519"},{"key":"ref14","first-page":"4905","article-title":"Adapting and controlling DNN-based speech synthesis using input codes","author":"luong","year":"2017","journal-title":"IEEE International Conference on Acoustics Speech and Signal Processing"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-172"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/IALP.2017.8300579"},{"key":"ref17","first-page":"5540","article-title":"Speaker and language factorization in DNN-based TTS synthesis","author":"fan","year":"2016","journal-title":"IEEE International Conference on Acoustics Speech and Signal Processing"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2339738"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2009.2015708"},{"key":"ref28","article-title":"Voiceloop: Voice fitting and synthesis via a phonological loop","author":"taigman","year":"2018","journal-title":"International Conference on Learning Representations"},{"key":"ref4","article-title":"SampleRNN: An unconditional end-to-end neural audio generation model","author":"mehri","year":"2017","journal-title":"International Conference on Learning Representations"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1038"},{"article-title":"Wavenet: A generative model for raw audio","year":"2016","author":"van den oord","key":"ref3"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"4006","DOI":"10.21437\/Interspeech.2017-1452","article-title":"Tacotron: Towards end-to-end speech synthesis","author":"wang","year":"2017","journal-title":"Proc of Interspeech"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2016-33"},{"key":"ref5","article-title":"Char2Wav: End-to- end speech synthesis","author":"sotelo","year":"2017","journal-title":"Proc of the Int Conf on Learning Representations (ICLR)"},{"key":"ref8","article-title":"Deep voice 3: Scaling text-to-speech with convolutional sequence learning","author":"ping","year":"2018","journal-title":"Proc of the Int Conf on Learning Representations (ICLR)"},{"key":"ref7","first-page":"195","article-title":"Deep voice: Real-time neural text-to-speech","author":"arik","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2012.2187195"},{"key":"ref9","first-page":"4480","article-title":"Transfer learning from speaker verification to multispeaker text-to-speech synthesis","author":"jia","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2006.05.003"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-014-2117-9"},{"key":"ref22","first-page":"4475","article-title":"Multi-speaker modeling and speaker adaptation for DNN-based TTS synthesis","author":"fan","year":"2015","journal-title":"IEEE International Conference on Acoustics Speech and Signal Processing"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CHINSL.2008.ECP.14"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1587\/transinf.2017EDP7165"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953089"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1587\/transinf.2015EDP7457"},{"key":"ref25","first-page":"1001910029","article-title":"Neural voice cloning with a few samples","author":"arik","year":"2018","journal-title":"Advances in Neural IInformation Processing Systems"}],"event":{"name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2020,5,4]]},"location":"Barcelona, Spain","end":{"date-parts":[[2020,5,8]]}},"container-title":["ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9040208\/9052899\/09053642.pdf?arnumber=9053642","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T20:15:48Z","timestamp":1656360948000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9053642\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,5]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/icassp40776.2020.9053642","relation":{},"subject":[],"published":{"date-parts":[[2020,5]]}}}