{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,2]],"date-time":"2025-11-02T16:52:48Z","timestamp":1762102368638},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T00:00:00Z","timestamp":1556668800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T00:00:00Z","timestamp":1556668800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T00:00:00Z","timestamp":1556668800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,5]]},"DOI":"10.1109\/icassp.2019.8683682","type":"proceedings-article","created":{"date-parts":[[2019,4,16]],"date-time":"2019-04-16T20:07:22Z","timestamp":1555445242000},"page":"6950-6954","source":"Crossref","is-referenced-by-count":24,"title":["Multi-speaker Emotional Acoustic Modeling for CNN-based Speech Synthesis"],"prefix":"10.1109","author":[{"given":"Heejin","family":"Choi","sequence":"first","affiliation":[]},{"given":"Sangjun","family":"Park","sequence":"additional","affiliation":[]},{"given":"Jinuk","family":"Park","sequence":"additional","affiliation":[]},{"given":"Minsoo","family":"Hahn","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPA.2017.8282231"},{"article-title":"Towards end-to-end prosody transfer for expressive speech synthesis with Tacotron","year":"2018","author":"skerry-ryan","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1452"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1121\/1.1915893"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"1118","DOI":"10.21437\/Interspeech.2017-314","article-title":"Speaker-dependent wavenet vocoder","author":"tamamori","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref15","article-title":"Deep Voice 3: Scaling text-to-speech with convolutional sequence learning","author":"ping","year":"2018","journal-title":"Proc ICLR"},{"key":"ref16","first-page":"448","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"ioffe","year":"2015","journal-title":"Proc ICML"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref18","article-title":"The htk book","author":"young","year":"2002","journal-title":"Entropic Cambridge Research Laboratory"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164317"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/EUSIPCO.2016.7760664"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178817"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-589"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-172"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-506"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178816"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPA.2017.8282282"},{"key":"ref1","first-page":"7962","article-title":"Statistical parametric speech synthesis using deep neural networks","author":"zen","year":"2013","journal-title":"Proc ICASSP"},{"article-title":"Pytorch: Tensors and dynamic neural networks in python with strong gpu acceleration","year":"2017","author":"paszke","key":"ref20"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1587\/transinf.2015EDP7457"},{"key":"ref21","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"Proc ICLR"},{"key":"ref24","article-title":"Multi-scale context aggregation by dilated convolutions","author":"yu","year":"2016","journal-title":"Proc ICLR"},{"article-title":"Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis","year":"2018","author":"wang","key":"ref23"},{"key":"ref25","article-title":"Language modeling with gated convolutional networks","author":"dauphin","year":"2017","journal-title":"Proc ICML"}],"event":{"name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2019,5,12]]},"location":"Brighton, United Kingdom","end":{"date-parts":[[2019,5,17]]}},"container-title":["ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8671773\/8682151\/08683682.pdf?arnumber=8683682","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,15]],"date-time":"2022-07-15T03:13:28Z","timestamp":1657854808000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8683682\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,5]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/icassp.2019.8683682","relation":{},"subject":[],"published":{"date-parts":[[2019,5]]}}}