{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T05:39:49Z","timestamp":1775281189292,"version":"3.50.1"},"reference-count":42,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2013,10,1]],"date-time":"2013-10-01T00:00:00Z","timestamp":1380585600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2013,10]]},"DOI":"10.1109\/tasl.2013.2269291","type":"journal-article","created":{"date-parts":[[2013,6,18]],"date-time":"2013-06-18T18:02:01Z","timestamp":1371578521000},"page":"2129-2139","source":"Crossref","is-referenced-by-count":105,"title":["Modeling Spectral Envelopes Using Restricted Boltzmann Machines and Deep Belief Networks for Statistical Parametric Speech Synthesis"],"prefix":"10.1109","volume":"21","author":[{"given":"Zhen-Hua","family":"Ling","sequence":"first","affiliation":[]},{"given":"Li","family":"Deng","sequence":"additional","affiliation":[]},{"given":"Dong","family":"Yu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","author":"toda","year":"2011","journal-title":"Hidden Markov Models Theory and Applications"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/89.221363"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1126\/science.1127647"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1162\/089976602760128018"},{"key":"ref31","author":"salakhutdinov","year":"2009","journal-title":"Learning deep generative models"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639225"},{"key":"ref37","doi-asserted-by":"crossref","first-page":"259","DOI":"10.1111\/j.2517-6161.1986.tb01412.x","article-title":"On the statistical analysis of dirty pictures","volume":"48","author":"besag","year":"1986","journal-title":"J R Statist Soc Ser B"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1250\/ast.21.79"},{"key":"ref35","first-page":"455","article-title":"Multi-space probability distribution HMM (invited paper)","volume":"e85 d","author":"tokuda","year":"2002","journal-title":"IEICE Trans Inf Syst"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/0004-3702(92)90065-6"},{"key":"ref10","first-page":"89","article-title":"Minimum generation error training for HMM-based speech synthesis","author":"wu","year":"2006","journal-title":"Proc ICASSP"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2010.06.006"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e90-d.5.816"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2182511"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"2263","DOI":"10.21437\/Eurospeech.2001-539","article-title":"Mixed excitation for HMM-based speech synthesis","author":"yoshimura","year":"2001","journal-title":"Proc EUROSPEECH"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2007.367011"},{"key":"ref15","first-page":"2034","article-title":"HMM-based unit selection using frame sized speech segments","author":"ling","year":"2006","journal-title":"Proc INTERSPEECH"},{"key":"ref16","author":"tokuda","year":"2004","journal-title":"Reformulating the HMM as a trajectory model Tech Rep of IEICE"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"121","DOI":"10.21437\/Interspeech.2011-31","article-title":"The effect of using normalized models in statistical speech synthesis","author":"shannon","year":"2011","journal-title":"Proc INTERSPEECH"},{"key":"ref18","first-page":"1332","article-title":"Improving Arabic HMM based speech synthesis quality","author":"abdel-hamid","year":"2006","journal-title":"Proc INTERSPEECH"},{"key":"ref19","first-page":"194","volume":"1","author":"smolensky","year":"1986","journal-title":"Parallel Distributed Processing"},{"key":"ref28","first-page":"7962","article-title":"Statistical parametric speech synthesis using deep neural networks","author":"zen","year":"2013","journal-title":"Proc ICASSP"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2000.861820"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1121\/1.1315288"},{"key":"ref3","author":"tokuda","year":"2004","journal-title":"Text to Speech Synthesis New Paradigms and Advances"},{"key":"ref6","article-title":"USTC system for blizzard challenge 2006: an improved HMM-based speech synthesis method","author":"ling","year":"2006","journal-title":"Proc Blizzard Challenge Workshop"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638996"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e90-1.1.325"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(98)00085-5"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.04.004"},{"key":"ref2","first-page":"2347","article-title":"Simultaneous modeling of spectrum, pitch and duration in HMM-based speech synthesis","author":"yoshimura","year":"1999","journal-title":"Proc EUROSPEECH"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2013.2269291"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2006.01.002"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1162\/neco.2006.18.7.1527"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"1692","DOI":"10.21437\/Interspeech.2010-487","article-title":"Binary coding of speech spectrograms using a deep auto-encoder","author":"deng","year":"2010","journal-title":"Proc INTERSPEECH"},{"key":"ref21","doi-asserted-by":"crossref","DOI":"10.1201\/9781482276237","author":"deng","year":"2003","journal-title":"Speech Processing A Dynamic and Optimization-Oriented Approach"},{"key":"ref42","article-title":"Joint spectral distribution modeling using restricted Boltzmann machines for voice conversion","author":"chen","year":"2013","journal-title":"Proc INTERSPEECH"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2009.2014796"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2134090"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1121\/1.409839"},{"key":"ref25","article-title":"A deep neural network for acoustic-articulatory speech inversion","author":"uria","year":"2011","journal-title":"Proc Deep Learning and Unsupervised Feature Learning Workshop NIPS 2011"}],"container-title":["IEEE Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10376\/6565391\/06542729.pdf?arnumber=6542729","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,12]],"date-time":"2024-05-12T15:05:42Z","timestamp":1715526342000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6542729\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,10]]},"references-count":42,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tasl.2013.2269291","relation":{},"ISSN":["1558-7916","1558-7924"],"issn-type":[{"value":"1558-7916","type":"print"},{"value":"1558-7924","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,10]]}}}