{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T19:56:30Z","timestamp":1774727790700,"version":"3.50.1"},"reference-count":29,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,3]]},"DOI":"10.1109\/icassp.2017.7953089","type":"proceedings-article","created":{"date-parts":[[2017,6,20]],"date-time":"2017-06-20T21:35:36Z","timestamp":1497994536000},"page":"4905-4909","source":"Crossref","is-referenced-by-count":36,"title":["Adapting and controlling DNN-based speech synthesis using input codes"],"prefix":"10.1109","author":[{"given":"Hieu-Thi","family":"Luong","sequence":"first","affiliation":[]},{"given":"Shinji","family":"Takaki","sequence":"additional","affiliation":[]},{"given":"Gustav Eje","family":"Henter","sequence":"additional","affiliation":[]},{"given":"Junichi","family":"Yamagishi","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472730"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2016-33"},{"key":"ref12","first-page":"294","article-title":"The HMM-based speech synthesis system (HTS) version 2.0","author":"heiga","year":"2007","journal-title":"Proc of SSW6"},{"key":"ref13","article-title":"A study of speaker adaptation for DNN-based speech synthesis","author":"wu","year":"0","journal-title":"Proceedings of Interspeech 2015"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2014.7078569"},{"key":"ref15","first-page":"4475","article-title":"Multi-speaker modeling and speaker adaptation for DNN-based TTS synthesis","author":"yuchen","year":"2015","journal-title":"Proc ICASSP"},{"key":"ref16","first-page":"2217","article-title":"Sentence-level control vectors for deep neural network speech synthesis","author":"watts","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"1713","DOI":"10.1109\/TASLP.2014.2346313","article-title":"Fast adaptation of deep neural network based on discriminant codes for speech recognition","volume":"22","author":"xue","year":"2014","journal-title":"IEEE\/ACM T Audio Speech"},{"key":"ref18","article-title":"Preliminary work on speaker adaptation for DNN-based speech synthesis","author":"potard","year":"2015","journal-title":"Tech Rep Idiap-RR-02&#x2013;2015 Idiap Research Institute"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-589"},{"key":"ref28","first-page":"230","article-title":"Applying conditional random fields to Japanese morphological analysis","author":"kudo","year":"2004","journal-title":"Proc EMNLP"},{"key":"ref4","first-page":"489","article-title":"HMM-based style control for expressive speech synthesis with arbitrary speaker's voice using model adaptation","volume":"92","author":"takashi","year":"2009","journal-title":"IEICE -Trans Inf Syst"},{"key":"ref27","first-page":"1315","article-title":"Speech parameter generation algorithms for HMM-basedspeech synthesis","author":"tokuda","year":"2000","journal-title":"Proc ICASSP"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e88-d.11.2484"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2014.2359987"},{"key":"ref29","first-page":"1964","article-title":"TTS synthesis with bidirectional LSTM based recurrent neural networks","author":"fan","year":"2014","journal-title":"Proc INTERSPEECH"},{"key":"ref5","first-page":"7962","article-title":"Statistical parametric speech synthesis using deep neural networks","author":"zen","year":"2013","journal-title":"Proc ICASSP"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472736"},{"key":"ref7","first-page":"5130","article-title":"Robust TTS duration modelling using DNNs","author":"gustav","year":"2016","journal-title":"Proc ICASSP"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e90-d.9.1406"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-390"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e90-d.2.533"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639211"},{"key":"ref22","first-page":"3511","article-title":"Recur-rent neural network language model adaptation for multi-genre broadcast speech recognition","author":"xie","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref21","first-page":"5280","article-title":"Speaker-aware training of LST M-RN Ns for acoustic modelling","author":"tan","year":"2016","journal-title":"Proc ICASSP"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-506"},{"key":"ref23","first-page":"234","article-title":"RecNorm: Simultaneous normalisation and classification applied to speech recognition","author":"bridle","year":"1990","journal-title":"Proc NIPS"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1250\/ast.27.349"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6855139"}],"event":{"name":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"New Orleans, LA","start":{"date-parts":[[2017,3,5]]},"end":{"date-parts":[[2017,3,9]]}},"container-title":["2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7943262\/7951776\/07953089.pdf?arnumber=7953089","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,26]],"date-time":"2019-09-26T11:24:47Z","timestamp":1569497087000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7953089\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,3]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/icassp.2017.7953089","relation":{},"subject":[],"published":{"date-parts":[[2017,3]]}}}