{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,22]],"date-time":"2026-03-22T06:55:56Z","timestamp":1774162556119,"version":"3.50.1"},"reference-count":30,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014,12]]},"DOI":"10.1109\/slt.2014.7078543","type":"proceedings-article","created":{"date-parts":[[2015,4,3]],"date-time":"2015-04-03T15:03:55Z","timestamp":1428073435000},"page":"19-23","source":"Crossref","is-referenced-by-count":50,"title":["Voice conversion using deep neural networks with speaker-independent pre-training"],"prefix":"10.1109","author":[{"given":"Seyed Hamidreza","family":"Mohammadi","sequence":"first","affiliation":[]},{"given":"Alexander","family":"Kain","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177730491"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6855135"},{"key":"ref11","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2013-666","article-title":"Joint spectral distribution modeling using restricted boltzmann machines for voice conversion","author":"chen","year":"2013","journal-title":"InterSpeech"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ChinaSIP.2013.6625307"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"369","DOI":"10.21437\/Interspeech.2013-102","article-title":"Voice conversion in high-order eigen space using deep belief nets","author":"nakashika","year":"2013","journal-title":"InterSpeech"},{"key":"ref14","volume":"7","author":"titterington","year":"1985","journal-title":"Statistical Analysis of Finite Mixture Distributions"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/0893-6080(89)90020-8"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2012.05.027"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639215"},{"key":"ref19","first-page":"281","article-title":"Combining a vector space representation of linguistic context with a deep neural network for text-to-speech synthesis","author":"lu","year":"2013"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1177\/1745691610393980"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.907344"},{"key":"ref27","article-title":"Pylearn 2: a machine learning research library","author":"goodfellow","year":"2013","journal-title":"arXiv preprint arXiv 1308 4214"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1998.674423"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-77046-6_59"},{"key":"ref29","author":"kain","year":"2001","journal-title":"High resolution voice transformation"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(94)00058-I"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"1032","DOI":"10.21437\/Interspeech.2013-113","article-title":"Real-time voice conversion using artificial neural networks with rectified linear units","author":"azarov","year":"2013","journal-title":"InterSpeech"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2047683"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/89.661472"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP.2013.6661965"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639003"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2013.2269291"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1126\/science.1127647"},{"key":"ref21","first-page":"625","article-title":"Why does unsupervised pre-training help deep learning?","volume":"11","author":"erhan","year":"2010","journal-title":"The Journal of Machine Learning Research"},{"key":"ref24","year":"0","journal-title":"Speech Signal Processing Toolkit (SPTK)"},{"key":"ref23","first-page":"3371","article-title":"Stacked denoising autoencoders: Learning useful representations in a deep network with a local denoising criterion","volume":"11","author":"vincent","year":"2010","journal-title":"The Journal of Machine Learning Research"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2007.366962"},{"key":"ref25","first-page":"833","article-title":"Contractive auto-encoders: Explicit invariance during feature extraction","author":"rifai","year":"2011","journal-title":"Proceedings of the 28th International Conference on Machine Learning (ICML-11)"}],"event":{"name":"2014 IEEE Spoken Language Technology Workshop (SLT)","location":"South Lake Tahoe, NV, USA","start":{"date-parts":[[2014,12,7]]},"end":{"date-parts":[[2014,12,10]]}},"container-title":["2014 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7066250\/7078533\/07078543.pdf?arnumber=7078543","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T21:33:26Z","timestamp":1747863206000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7078543\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,12]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/slt.2014.7078543","relation":{},"subject":[],"published":{"date-parts":[[2014,12]]}}}