{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T04:04:26Z","timestamp":1751947466546,"version":"3.41.2"},"reference-count":34,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,7]]},"DOI":"10.1109\/iwobi.2018.8464204","type":"proceedings-article","created":{"date-parts":[[2018,9,13]],"date-time":"2018-09-13T21:48:06Z","timestamp":1536875286000},"page":"1-7","source":"Crossref","is-referenced-by-count":3,"title":["Pre-training Long Short-term Memory Neural Networks for Efficient Regression in Artificial Speech Postfiltering"],"prefix":"10.1109","author":[{"given":"Marvin","family":"Coto-Jimenez","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","first-page":"223","article-title":"The CMU Arctic speech databases","author":"kominek","year":"2004","journal-title":"Proc Fifth ISCA Speech Synthesis Workshop"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CIFER.2003.1196237"},{"journal-title":"The HTS Group HMM\/DNN-based Speech Synthesis System (HTS)","year":"0","key":"ref31"},{"key":"ref30","first-page":"115","volume":"3","author":"gers","year":"2002","journal-title":"Learning precise timing with LSTM recurrent networks J Mach Learn Res"},{"key":"ref34","first-page":"29","article-title":"MFCC+F0 extraction and waveform reconstruction using HNM: preliminary results in an HMM-based synthesizer","author":"erro","year":"2010","journal-title":"VI Jornadas en Tecnologia del Habla & II Iberian SLTech (FALA)"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1250\/ast.39.163"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"84","DOI":"10.1109\/TASLP.2017.2761547","article-title":"Statistical Parametric Speech Synthesis Incorporating Generative Adversarial Networks","author":"saito","year":"2018","journal-title":"Audio Speech and Language Processing IEEE\/ACM Transactions on"},{"key":"ref12","article-title":"Understanding the difficulty of training deep feedforward neural networks","author":"glorot","year":"2010","journal-title":"Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2134090"},{"key":"ref14","first-page":"625","article-title":"Why does unsupervised pre-training help deep learning?","author":"erhan","year":"2010","journal-title":"Journal of Machine Learning Research 11 Feb"},{"key":"ref15","article-title":"Transfer learning by supervised pre-training for audio-based music classification","author":"van","year":"2014","journal-title":"Conference of the International Society for Music Information Retrieval (ISMIR 2014)"},{"key":"ref16","article-title":"Decaf: A deep convolutional activation feature for generic visual recognition","author":"donahue","year":"2014","journal-title":"International Conference on Machine Learning"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707741"},{"key":"ref18","article-title":"Modified post-filter to recover modulation spectrum for HMM-based speech synthesis","author":"takamichi","year":"2014","journal-title":"IEEE Global Conference on Signal and Information Processing"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"755","DOI":"10.1109\/TASLP.2016.2522655","article-title":"Postfilters to modify the modulation spectrum for statistical parametric speech synthesis","volume":"24","author":"takamichi","year":"2016","journal-title":"Proceedings of the IEEE\/ACM Transactions on Audio Speech and Language Processing (TASLP)"},{"key":"ref28","article-title":"Bidirectional LSTM networks for improved phoneme classification and recognition","volume":"3697","author":"graves","year":"0","journal-title":"Artificial Neural Networks Formal Models and Their Applications - ICANN 2005 Lecture Notes in Computer Science"},{"key":"ref4","first-page":"805","article-title":"Adaptation of pitch and spectrum for HMM-based speech synthesis using MLLR","volume":"2","author":"tamura","year":"2001","journal-title":"Proceedings of IEEE Acoustics Speech and Signal Processing"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","article-title":"Long short-term memory","volume":"9","author":"sepp","year":"1997","journal-title":"Neural Computation"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"2347","DOI":"10.21437\/Eurospeech.1999-596","article-title":"Simultaneous modeling of spectrum, pitch and duration in HMM-based speech synthesis","author":"yoshimura","year":"1999","journal-title":"Proceedings of the Sixth European Conference on Speech Communication and Technology"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639215"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707742"},{"key":"ref5","first-page":"415","article-title":"Hidden Markov Models for Artificial Voice Production and Accent Modification","author":"coto-jim\u00e9nez","year":"2016","journal-title":"Proc of the Ibero-American Conference on Artificial Intelligence"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854321"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178814"},{"key":"ref2","first-page":"93","author":"holmes","year":"2001","journal-title":"Speech Synthesis and Recognition"},{"key":"ref9","article-title":"DNN-based stochastic postfilter for HMM-based speech synthesis","author":"chen","year":"2014","journal-title":"Proceedings of Interspeech"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2013.2251852"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2461448"},{"key":"ref22","article-title":"Voice conversion in high-order eigen space using deep belief nets","author":"nakashika","year":"2013","journal-title":"Proceedings of Interspeech"},{"key":"ref21","article-title":"A postfilter to modify the modulation spectrum in HMM-based speech synthesis","author":"takaichi","year":"2014","journal-title":"Proc IEEE Conf Acoustics Speech Signal Proc (ICASSP)"},{"key":"ref24","article-title":"Improving Automatic Speech Recognition Containing Additive Noise Using Deep Denoising Autoencoders of LSTM Networks","volume":"9811","author":"coto-jim\u00e9nez","year":"0","journal-title":"Speech and Computer SPECOM 2016 Lecture Notes in Computer Science"},{"key":"ref23","article-title":"EESEN: End-to-end speech recognition using deep RNN models and WFST-based decoding","author":"yajie","year":"2015","journal-title":"Automatic Speech Recognition and Understanding (ASRU) 2015 IEEE Workshop on IEEE"},{"key":"ref26","article-title":"TTS synthesis with bidirectional LSTM based recurrent neural networks","author":"fan","year":"2014","journal-title":"Proceedings of Interspeech"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1142\/S021800141860008X"}],"event":{"name":"2018 IEEE International Work Conference on Bioinspired Intelligence (IWOBI)","start":{"date-parts":[[2018,7,18]]},"location":"San Carlos","end":{"date-parts":[[2018,7,20]]}},"container-title":["2018 IEEE International Work Conference on Bioinspired Intelligence (IWOBI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8451008\/8464128\/08464204.pdf?arnumber=8464204","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,7]],"date-time":"2025-07-07T06:25:28Z","timestamp":1751869528000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8464204\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,7]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/iwobi.2018.8464204","relation":{},"subject":[],"published":{"date-parts":[[2018,7]]}}}