{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T14:04:08Z","timestamp":1730297048319,"version":"3.28.0"},"reference-count":35,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1109\/slt.2018.8639588","type":"proceedings-article","created":{"date-parts":[[2019,2,14]],"date-time":"2019-02-14T23:36:34Z","timestamp":1550187394000},"page":"618-625","source":"Crossref","is-referenced-by-count":0,"title":["Hierarchical RNNs for Waveform-Level Speech Synthesis"],"prefix":"10.1109","author":[{"given":"Qingyun","family":"Dou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Moquan","family":"Wan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gilles","family":"Degottex","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiyi","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mark J.F.","family":"Gales","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"article-title":"Adam: A method for stochastic optimization","year":"2014","author":"kingma","key":"ref33"},{"key":"ref32","article-title":"Waveform level synthesis","author":"dou","year":"2017","journal-title":"Mphil thesis"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2567384"},{"article-title":"A clockwork rnn","year":"2014","author":"koutnik","key":"ref30"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"ref34","first-page":"901","article-title":"Weight normalization: A simple reparameterization to accelerate training of deep neural networks","author":"salimans","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639225"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2165280"},{"key":"ref12","article-title":"Wavenet: A generative model for raw audio","author":"van den oord","year":"2016","journal-title":"CoRR abs\/1609 03499"},{"article-title":"Char2wav: End-to-end speech synthesis","year":"2017","author":"sotelo","key":"ref13"},{"article-title":"Natural tts synthesis by conditioning wavenet on mel spectrogram predictions","year":"2017","author":"shen","key":"ref14"},{"article-title":"Efficient neural audio synthesis","year":"2018","author":"kalchbrenner","key":"ref15"},{"article-title":"Generating sequences with recurrent neural networks","year":"2013","author":"graves","key":"ref16"},{"article-title":"Samplernn: An unconditional end-to-end neural audio generation model","year":"2016","author":"mehri","key":"ref17"},{"article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","year":"2014","author":"chung","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2016-35"},{"key":"ref4","article-title":"Simultaneous modeling of phonetic and prosodic parameters, and characteristic conversion for hmm-based text-to-speech systems","author":"yoshimura","year":"2002","journal-title":"PhD diss"},{"journal-title":"ITU","article-title":"Pulse code modulation (pcm) of voice frequencies","year":"1988","key":"ref27"},{"key":"ref3","article-title":"Atr v-talk speech synthesis system","author":"sagisaka","year":"1992","journal-title":"Proc ICSLP 1992"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2012.2187195"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2835720"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.04.004"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2014.2359987"},{"key":"ref7","first-page":"88","article-title":"Statistical parametric speech synthesis with joint estimation of acoustic and excitation model parameters","author":"maia","year":"2010","journal-title":"SSW"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"373","DOI":"10.1109\/ICASSP.1996.541110","article-title":"Unit selection in a concatenative speech synthesis system using a large speech database","volume":"1","author":"hunt","year":"1996","journal-title":"Acoustics Speech and Signal Processing 1996 ICASSP-96 Conference Proceedings 1996 IEEE International Conference on"},{"key":"ref9","first-page":"7962","article-title":"Statistical parametric speech synthesis using deep neural networks","author":"zen","year":"2013","journal-title":"Acoustics Speech and Signal Processing (ICASSP) 2013 IEEE International Conference on"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(90)90021-Z"},{"key":"ref20","first-page":"1310","article-title":"On the difficulty of training recurrent neural networks","author":"pascanu","year":"2013","journal-title":"International Conference on Machine Learning"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.543"},{"key":"ref21","article-title":"Speech synthesis techniques using deep neural networks","author":"saxena","year":"2017","journal-title":"Blog Post"},{"journal-title":"Mixture density networks Neural computing research group technical report ncrg\/94\/004","year":"1994","author":"bishop","key":"ref24"},{"key":"ref23","article-title":"The ustc system for blizzard challenge 2017","author":"hu","year":"2017","journal-title":"Proc Blizzard Challenge Workshop"},{"article-title":"Pixel recurrent neural networks","year":"2016","author":"van den oord","key":"ref26"},{"key":"ref25","first-page":"1927","article-title":"Generative image modeling using spatial lstms","author":"theis","year":"2015","journal-title":"Advances in neural information processing systems"}],"event":{"name":"2018 IEEE Spoken Language Technology Workshop (SLT)","start":{"date-parts":[[2018,12,18]]},"location":"Athens, Greece","end":{"date-parts":[[2018,12,21]]}},"container-title":["2018 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8632666\/8639030\/08639588.pdf?arnumber=8639588","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,27]],"date-time":"2022-01-27T07:21:59Z","timestamp":1643268119000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8639588\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/slt.2018.8639588","relation":{},"subject":[],"published":{"date-parts":[[2018,12]]}}}