{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,6]],"date-time":"2025-10-06T09:18:59Z","timestamp":1759742339701,"version":"3.41.0"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2017,1,12]],"date-time":"2017-01-12T00:00:00Z","timestamp":1484179200000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multidim Syst Sign Process"],"published-print":{"date-parts":[[2018,1]]},"DOI":"10.1007\/s11045-017-0470-3","type":"journal-article","created":{"date-parts":[[2017,1,12]],"date-time":"2017-01-12T05:52:56Z","timestamp":1484200376000},"page":"385-403","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Speech naturalness improvement via $$\\mathrm {\\epsilon }$$ \u03f5 -closed extended vectors sets in voice conversion systems"],"prefix":"10.1007","volume":"29","author":[{"given":"Mohammad Javad","family":"Jannati","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Abolghasem","family":"Sayadiyan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Abolfazl","family":"Razi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,1,12]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Abe, M., Nakamura, S., Shikano, K., & Kuwabara, H. (1988). Voice conversion through vector quantization. In International conference on acoustics, speech, and signal processing, ICASSP-88 (Vol. 1, pp. 655\u2013658).","key":"470_CR1","DOI":"10.1109\/ICASSP.1988.196671"},{"doi-asserted-by":"crossref","unstructured":"Charlier, M., Ohtani, Y., Toda, T., Moinet, A., & Dutoit, T. (2009). Cross-language voice conversion based on eigenvoices. In 10th Annual conference of the international speech communication association, Brighton, UK, September 6\u201310, pp. 1635\u20131638.","key":"470_CR2","DOI":"10.21437\/Interspeech.2009-488"},{"issue":"12","key":"470_CR3","first-page":"1859","volume":"22","author":"L Chen","year":"2014","unstructured":"Chen, L., Ling, Z., Liu, L., & Dai, L. (2014). Voice conversion using deep neural networks with layer-wise generative training. IEEE ACM Transactions on Audio, Speech, and Language Processing, 22(12), 1859\u20131872.","journal-title":"IEEE ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"5","key":"470_CR4","doi-asserted-by":"crossref","first-page":"68","DOI":"10.1016\/S1005-8885(14)60333-2","volume":"21","author":"X Chen","year":"2014","unstructured":"Chen, X., & Zhang, L. (2014). High-quality voice conversion system based on GMM statistical parameters and RBF neural network. The Journal of China Universities of Posts and Telecommunications, 21(5), 68\u201375.","journal-title":"The Journal of China Universities of Posts and Telecommunications"},{"doi-asserted-by":"crossref","unstructured":"Childers, D., Yegnanarayana, B., & Wu, K. (1985) Voice conversion: Factors responsible for quality. In IEEE international conference on acoustics, speech, and signal processing, ICASSP \u201985 (Vol. 10, pp. 748\u2013751).","key":"470_CR5","DOI":"10.1109\/ICASSP.1985.1168479"},{"doi-asserted-by":"crossref","unstructured":"Desai, S., Raghavendra, E. V., Yegnanarayana, B., Black, A. W., & Prahallad, K. (2009). Voice conversion using artificial neural networks. In IEEE international conference on acoustics, speech, and signal processing (pp. 3893\u20133896).","key":"470_CR6","DOI":"10.1109\/ICASSP.2009.4960478"},{"issue":"12","key":"470_CR7","doi-asserted-by":"crossref","first-page":"824","DOI":"10.1587\/elex.6.824","volume":"6","author":"R Doost","year":"2009","unstructured":"Doost, R., Sayadiyan, A., & Shamsi, H. (2009). A new perceptually weighted distance measure for vector quantization of the STFT amplitudes in the speech application. IEICE Electronics Express, 6(12), 824\u2013830.","journal-title":"IEICE Electronics Express"},{"doi-asserted-by":"crossref","unstructured":"Eide, E., & Picheny, M. (2006). Towards pooled-speaker concatenative text-to-speech. In IEEE International conference on acoustics, speech and signal processing, ICASSP \u201906 (Vol. 1, pp. 73\u201376).","key":"470_CR8","DOI":"10.1109\/ICASSP.2006.1659960"},{"doi-asserted-by":"crossref","unstructured":"Erro, D., & Moreno, A. (2007). Weighted frequency warping for voice conversion. In Annual conference of the international speech communication association, InterSpeech \u201907.","key":"470_CR9","DOI":"10.21437\/Interspeech.2007-550"},{"doi-asserted-by":"crossref","unstructured":"Erro, D., Navas, E., & Hern\u00e1ez, I. (2012). Iterative MMSE estimation of vocal tract length normalization factors for voice transformation. In 13th Annual conference of the international speech communication association, INTERSPEECH \u201912, Portland, Oregon, USA, September 9\u201313, pp. 86\u201389.","key":"470_CR10","DOI":"10.21437\/Interspeech.2012-32"},{"issue":"3","key":"470_CR11","doi-asserted-by":"crossref","first-page":"556","DOI":"10.1109\/TASL.2012.2227735","volume":"21","author":"D Erro","year":"2013","unstructured":"Erro, D., Navas, E., & Hernaez, I. (2013). Parametric voice conversion based on bilinear frequency warping plus amplitude scaling. IEEE Transactions on Audio, Speech, and Language Processing, 21(3), 556\u2013566.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"470_CR12","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1016\/j.specom.2014.12.004","volume":"67","author":"M Ghorbandoost","year":"2015","unstructured":"Ghorbandoost, M., Sayadiyan, A., Ahangar, M., Sheikhzadeh, H., Shahrebabaki, A. S., & Amini, J. (2015). Voice conversion based on feature combination with limited training data. Speech Communication, 67, 113\u2013128.","journal-title":"Speech Communication"},{"issue":"3","key":"470_CR13","doi-asserted-by":"crossref","first-page":"806","DOI":"10.1109\/TASL.2011.2165944","volume":"20","author":"E Helander","year":"2012","unstructured":"Helander, E., Silen, H., Virtanen, T., & Gabbouj, M. (2012). Voice conversion using dynamic kernel partial least squares regression. IEEE Transactions on Audio, Speech, and Language Processing, 20(3), 806\u2013817.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"5","key":"470_CR14","doi-asserted-by":"crossref","first-page":"912","DOI":"10.1109\/TASL.2010.2041699","volume":"18","author":"E Helander","year":"2010","unstructured":"Helander, E., Virtanen, T., Nurminen, J., & Gabbouj, M. (2010). Voice conversion using partial least squares regression. IEEE Transactions on Audio, Speech, and Language Processing, 18(5), 912\u2013921.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"1","key":"470_CR15","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1109\/TASL.2007.911054","volume":"16","author":"Y Hu","year":"2008","unstructured":"Hu, Y., & Loizou, P. (2008). Evaluation of objective quality measures for speech enhancement. IEEE Transactions on Audio, Speech, and Language Processing, 16(1), 229\u2013238.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"doi-asserted-by":"crossref","unstructured":"Kain, A., & Macon, M. (1998). Spectral voice conversion for text-to-speech synthesis. In IEEE international conference on acoustics, speech and signal processing, ICASSP \u201998 (Vol. 1, pp. 285\u2013288).","key":"470_CR16","DOI":"10.1109\/ICASSP.1998.674423"},{"key":"470_CR17","doi-asserted-by":"crossref","first-page":"187","DOI":"10.1016\/S0167-6393(98)00085-5","volume":"27","author":"H Kawahara","year":"1999","unstructured":"Kawahara, H., Masuda-Katsuse, I., & de Cheveign, A. (1999). Restructuring speech representations using a pitch-adaptive time frequency smoothing and an instantaneous frequency based f0 extraction: Possible role of a repetitive structure in sounds. Speech Communication, 27, 187\u2013207.","journal-title":"Speech Communication"},{"unstructured":"Kominek, J., & Black, A. W. (2004). The CMU Arctic speech databases. In Fifth ISCA workshop on speech synthesis.","key":"470_CR18"},{"issue":"2","key":"470_CR19","doi-asserted-by":"crossref","first-page":"641","DOI":"10.1109\/TASL.2006.876760","volume":"15","author":"K Lee","year":"2007","unstructured":"Lee, K. (2007). Statistical approach for voice personality transformation. IEEE Transactions on Audio, Speech and Language Processing, 15(2), 641\u2013651.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"issue":"1","key":"470_CR20","doi-asserted-by":"crossref","first-page":"84","DOI":"10.1109\/TCOM.1980.1094577","volume":"28","author":"Y Linde","year":"1980","unstructured":"Linde, Y., Buzo, A., & Gray, R. (1980). An algorithm for vector quantizer design. IEEE Transactions on Communications, 28(1), 84\u201395.","journal-title":"IEEE Transactions on Communications"},{"issue":"15","key":"470_CR21","doi-asserted-by":"crossref","first-page":"1077","DOI":"10.1587\/elex.6.1077","volume":"6","author":"P Mowlaee","year":"2009","unstructured":"Mowlaee, P., Sayadiyan, A., & Sheikhzadeh, H. (2009). FDMSM robust signal representation for speech mixtures and noise corrupted audio signals. IEICE Electronics Express, 6(15), 1077\u20131083.","journal-title":"IEICE Electronics Express"},{"key":"470_CR22","doi-asserted-by":"crossref","first-page":"1909","DOI":"10.1587\/transinf.E93.D.1909","volume":"93","author":"K Nakamura","year":"2010","unstructured":"Nakamura, K., Toda, T., Saruwatari, H., & Shikano, K. (2010). Evaluation of extremely small sound source signals used in speaking-aid system with statistical voice conversion. IEICE Transactions on Information and Systems, 93, 1909\u20131917.","journal-title":"IEICE Transactions on Information and Systems"},{"issue":"3","key":"470_CR23","first-page":"580","volume":"23","author":"T Nakashika","year":"2015","unstructured":"Nakashika, T., Takiguchi, T., & Ariki, Y. (2015a). Voice conversion using RNN pre-trained by recurrent temporal restricted boltzmann machines. IEEE ACM Transactions on Audio, Speech, and Language Processing, 23(3), 580\u2013587.","journal-title":"IEEE ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"1","key":"470_CR24","doi-asserted-by":"crossref","first-page":"8","DOI":"10.1186\/s13636-014-0044-3","volume":"2015","author":"T Nakashika","year":"2015","unstructured":"Nakashika, T., Takiguchi, T., & Ariki, Y. (2015). Voice conversion using speaker-dependent conditional restricted boltzmann machine. EURASIP Journal on Audio, Speech, and Music Processing, 2015(1), 8.","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"key":"470_CR25","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-84996-056-4","volume-title":"Speech dereverberation","author":"PA Naylor","year":"2010","unstructured":"Naylor, P. A., & Gaubitch, N. D. (2010). Speech dereverberation (1st ed.). London: Springer.","edition":"1"},{"issue":"1","key":"470_CR26","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1109\/89.221363","volume":"1","author":"K Paliwal","year":"1993","unstructured":"Paliwal, K., & Atal, B. (1993). Efficient vector quantization of LPC parameters at 24 bits\/frame. IEEE Transactions on Speech and Audio Processing, 1(1), 3\u201314.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"doi-asserted-by":"crossref","unstructured":"Ramachandran, R., & Mammone, R. (1995). Modern methods of speech processing (1st ed.). New York: Springer. ISSN: 0893-3405.","key":"470_CR27","DOI":"10.1007\/978-1-4615-2281-2"},{"doi-asserted-by":"crossref","unstructured":"Saino, K., Zen, H., Nankaku, Y., Lee, A., & Tokuda, K. (2006). An HMM-based singing voice synthesis system. In Ninth international conference on spoken language processing, INTERSPEECH \u201906, Pittsburgh, PA, USA, September 17\u201321.","key":"470_CR28","DOI":"10.21437\/Interspeech.2006-584"},{"key":"470_CR29","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4471-6407-4","volume-title":"Mathematical tools for data mining","author":"DA Simovici","year":"2014","unstructured":"Simovici, D. A., & Djeraba, C. (2014). Mathematical tools for data mining (2nd ed.). London: Springer.","edition":"2"},{"doi-asserted-by":"crossref","unstructured":"Streijl, R. C., Winkler, S., & Hands, D. S. (2016). Mean opinion score revisited: Methods and applications, limitations and alternatives. Multimedia Systems.","key":"470_CR30","DOI":"10.1007\/s00530-014-0446-1"},{"unstructured":"Stylianou, I. (1996). Harmonic plus noise models for speech, combined with statistical methods, for speech and speaker modification. Ph.D. thesis, Ecole Nationale Sup\u00e9rieure des T\u00e9l\u00e9communications.","key":"470_CR31"},{"doi-asserted-by":"crossref","unstructured":"Sundermann, D., Hoge, H., Bonafonte, A., Ney, H., Black, A., & Narayanan, S. (2006). Text-independent voice conversion based on unit selection. In IEEE international conference on acoustics, speech and signal processing, ICASSP \u201906 (Vol. 1, pp. 81\u201384).","key":"470_CR32","DOI":"10.1109\/ICASSP.2006.1659962"},{"doi-asserted-by":"crossref","unstructured":"Takashima, R., Takiguchi, T., & Ariki, Y. (2012). Exemplar based voice conversion in noisy environment. In Spoken language technology workshop (SLT) (pp. 313\u2013317).","key":"470_CR33","DOI":"10.1109\/SLT.2012.6424242"},{"doi-asserted-by":"crossref","unstructured":"Toda, T., Black, A., & Tokuda, K. (2005). Spectral conversion based on maximum likelihood estimation considering global variance of converted parameter. In IEEE international conference on acoustics, speech, and signal processing, ICASSP \u201905 (Vol. 1, pp. 9\u201312).","key":"470_CR34","DOI":"10.1109\/ICASSP.2005.1415037"},{"issue":"8","key":"470_CR35","doi-asserted-by":"crossref","first-page":"2222","DOI":"10.1109\/TASL.2007.907344","volume":"15","author":"T Toda","year":"2007","unstructured":"Toda, T., Black, A. W., & Tokuda, K. (2007). Voice conversion based on maximum-likelihood estimation of spectral parameter trajectory. IEEE Transactions on Audio, Speech and Language Processing, 15(8), 2222\u20132235.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"doi-asserted-by":"crossref","unstructured":"Toda, T., Ohtani, Y., & Shikano, K. (2006). Eigenvoice conversion based on Gaussian mixture model. In Ninth international conference on spoken language processing, INTERSPEECH \u201906, Pittsburgh, PA, USA, September 17\u201321, 2006.","key":"470_CR36","DOI":"10.21437\/Interspeech.2006-613"},{"doi-asserted-by":"crossref","unstructured":"Toda, T., Saruwatari, H., & Shikano, K. (2001). Voice conversion algorithm based on Gaussian mixture model with dynamic frequency warping of straight spectrum. In IEEE international conference on acoustics, speech, and signal processing, ICASSP \u201901 (Vol. 2, pp. 841\u2013844).","key":"470_CR37","DOI":"10.1109\/ICASSP.2001.941046"},{"key":"470_CR38","doi-asserted-by":"crossref","first-page":"175","DOI":"10.1016\/0167-6393(92)90012-V","volume":"11","author":"H Valbret","year":"1992","unstructured":"Valbret, H., Moulines, E., & Tubach, J. (1992). Voice transformation using PSOLA technique. Speech Communication, 11, 175\u2013187.","journal-title":"Speech Communication"},{"issue":"10","key":"470_CR39","doi-asserted-by":"crossref","first-page":"1506","DOI":"10.1109\/TASLP.2014.2333242","volume":"22","author":"Z Wu","year":"2014","unstructured":"Wu, Z., Virtanen, T., Chng, E. S., & Li, H. (2014). Exemplar based sparse representation with residual compensation for voice conversion. IEEE Transactions on Speech and Audio Processing, 22(10), 1506\u20131521.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"unstructured":"Wu, Z., Virtanen, T., Kinnunen, T., Chng, E. S., & Li, H. (2013). Exemplarbased voice conversion using nonnegative spectrogram deconvolution. In 8th ISCA speech synthesis workshop.","key":"470_CR40"}],"container-title":["Multidimensional Systems and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11045-017-0470-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11045-017-0470-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11045-017-0470-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,14]],"date-time":"2025-06-14T10:13:32Z","timestamp":1749896012000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11045-017-0470-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,1,12]]},"references-count":40,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2018,1]]}},"alternative-id":["470"],"URL":"https:\/\/doi.org\/10.1007\/s11045-017-0470-3","relation":{},"ISSN":["0923-6082","1573-0824"],"issn-type":[{"type":"print","value":"0923-6082"},{"type":"electronic","value":"1573-0824"}],"subject":[],"published":{"date-parts":[[2017,1,12]]}}}