{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T08:42:30Z","timestamp":1774946550022,"version":"3.50.1"},"reference-count":42,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,6,6]]},"DOI":"10.1109\/icassp39728.2021.9414718","type":"proceedings-article","created":{"date-parts":[[2021,5,13]],"date-time":"2021-05-13T19:53:45Z","timestamp":1620935625000},"page":"5709-5713","source":"Crossref","is-referenced-by-count":45,"title":["Parallel Tacotron: Non-Autoregressive and Controllable TTS"],"prefix":"10.1109","author":[{"given":"Isaac","family":"Elias","sequence":"first","affiliation":[]},{"given":"Heiga","family":"Zen","sequence":"additional","affiliation":[]},{"given":"Jonathan","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Yu","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Ye","family":"Jia","sequence":"additional","affiliation":[]},{"given":"Ron J.","family":"Weiss","sequence":"additional","affiliation":[]},{"given":"Yonghui","family":"Wu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","first-page":"9605","article-title":"An Intriguing Failing of Convolutional Neural Networks and the CoordConv Solution","author":"liu","year":"2018","journal-title":"Proc NeurIPS"},{"key":"ref38","article-title":"Attention Is All You Need","author":"vaswani","year":"2017","journal-title":"Proc NeurIPS"},{"key":"ref33","article-title":"Non-Autoregressive Neural Machine Translation","author":"gu","year":"2017"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9052964"},{"key":"ref31","article-title":"Pay Less Attention with Lightweight and Dynamic Convolutions","author":"wu","year":"2019","journal-title":"Proc ICLR"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053520"},{"key":"ref37","article-title":"The Aligner: Text to Speech Alignment using Markov Models and a Pronunciation Dictionary","author":"talkin","year":"1994","journal-title":"ESCA\/IEEE SSW2"},{"key":"ref36","article-title":"Layer Normalization","author":"ba","year":"2016"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6413"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1149"},{"key":"ref10","first-page":"4601","article-title":"Professor Forcing: A New Algorithm for Training Recurrent Networks","author":"goyal","year":"2016","journal-title":"Proc NIPS"},{"key":"ref40","article-title":"Generating Sequences with Recurrent Neural Networks","author":"graves","year":"2013"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1972"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2935807"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2176"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054106"},{"key":"ref15","article-title":"FastSpeech: Fast, Robust and Controllable Text to Speech","author":"ren","year":"2019"},{"key":"ref16","article-title":"FastSpeech 2: Fast and High-Quality End-to-End Text to Speech","author":"ren","year":"2020"},{"key":"ref17","article-title":"TalkNet: Fully-Convolutional Non-Autoregressive Speech Synthesis Model","author":"beliaev","year":"2020"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2123"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054119"},{"key":"ref28","article-title":"Hierarchical Generative Modeling for Controllable Speech Synthesis","author":"hsu","year":"2019","journal-title":"Proc ICLR"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016706"},{"key":"ref27","first-page":"5167","article-title":"Style Tokens: Unsupervised Style Modeling, Control and Transfer in End-to-End Speech Synthesis","author":"wang","year":"2018","journal-title":"Proc ICML"},{"key":"ref3","article-title":"Deep Voice 3: 2000-Speaker Neural Text-to-Speech","author":"ping","year":"2018","journal-title":"Proc ICLR"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2668"},{"key":"ref5","article-title":"WaveNet: A Generative Model for Raw Audio","author":"van den oord","year":"2016"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1989.1.2.270"},{"key":"ref7","article-title":"Neural Machine Translation by Jointly Learning to Align and Translate","author":"bahdanau","year":"2015","journal-title":"Proc ICLR"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"4006","DOI":"10.21437\/Interspeech.2017-1452","article-title":"Tacotron: Towards End-to-End Speech Synthesis","author":"wang","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref9","first-page":"1171","article-title":"Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks","author":"bengio","year":"2015","journal-title":"Proc NIPS"},{"key":"ref1","article-title":"Char2Wav: End-to-End Speech Synthesis","author":"sotelo","year":"2017","journal-title":"Proc ICLR"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054484"},{"key":"ref22","article-title":"FastPitch: Parallel Text-to-speech with Pitch Prediction","author":"la?cucki","year":"2020"},{"key":"ref21","article-title":"End-to-End Adversarial Text-to-Speech","author":"donahue","year":"2020"},{"key":"ref42","first-page":"2410","article-title":"Efficient Neural Audio Synthesis","author":"kalchbrenner","year":"2018","journal-title":"Proc ICML"},{"key":"ref24","article-title":"Non-Attentive Tacotron: Robust and Controllable Neural TTS Synthesis Including Unsupervised Duration Modeling","author":"shen","year":"2020"},{"key":"ref41","article-title":"Towards End-to-End Prosody Transfer for Expressive Speech Synthesis with Tacotron","author":"skerry-ryan","year":"2018","journal-title":"Proc ICML"},{"key":"ref23","article-title":"DurIAN: Duration informed attention network for multimodal synthesis","author":"yu","year":"2019"},{"key":"ref26","first-page":"7962","article-title":"Statistical Parametric Speech Synthesis Using Deep Neural Networks","author":"zen","year":"2013","journal-title":"Proc ICASSP"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"1039","DOI":"10.1016\/j.specom.2009.04.004","article-title":"Statistical Parametric Speech Synthesis","volume":"51","author":"zen","year":"2009","journal-title":"Speech Communication"}],"event":{"name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Toronto, ON, Canada","start":{"date-parts":[[2021,6,6]]},"end":{"date-parts":[[2021,6,11]]}},"container-title":["ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9413349\/9413350\/09414718.pdf?arnumber=9414718","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:40:56Z","timestamp":1652197256000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9414718\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,6]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/icassp39728.2021.9414718","relation":{},"subject":[],"published":{"date-parts":[[2021,6,6]]}}}