{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,6,23]],"date-time":"2023-06-23T22:10:22Z","timestamp":1687558222904},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2012,6,26]],"date-time":"2012-06-26T00:00:00Z","timestamp":1340668800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2012,9]]},"DOI":"10.1007\/s10772-012-9164-2","type":"journal-article","created":{"date-parts":[[2012,6,25]],"date-time":"2012-06-25T14:05:17Z","timestamp":1340633117000},"page":"419-431","source":"Crossref","is-referenced-by-count":7,"title":["A pitch synchronous approach to design voice conversion system using source-filter correlation"],"prefix":"10.1007","volume":"15","author":[{"given":"Rabul","family":"Hussain Laskar","sequence":"first","affiliation":[]},{"given":"Kalyan","family":"Banerjee","sequence":"additional","affiliation":[]},{"given":"Fazal Ahmed","family":"Talukdar","sequence":"additional","affiliation":[]},{"given":"K. Sreenivasa","family":"Rao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2012,6,26]]},"reference":[{"key":"9164_CR1","first-page":"655","volume-title":"Proc. of int. conf. on acoustics, speech, and signal process","author":"M. Abe","year":"1988","unstructured":"Abe, M., Nakanura, S., Shikano, K., & Kuwabara, H. (1988). Voice conversion through vector quantization. In Proc. of int. conf. on acoustics, speech, and signal process (Vol.\u00a01, pp. 655\u2013658). New York: IEEE."},{"key":"9164_CR2","doi-asserted-by":"crossref","first-page":"439","DOI":"10.21437\/Eurospeech.1995-119","volume-title":"Proc. of Eurospeech","author":"M. Akagi","year":"1995","unstructured":"Akagi, M., & Ienaga, T. (1995). Speaker individualities in fundamental frequency contours and its control. In Proc. of Eurospeech (pp. 439\u2013442)."},{"issue":"3","key":"9164_CR3","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1016\/S0167-6393(99)00015-1","volume":"28","author":"L. M. Arslan","year":"1999","unstructured":"Arslan, L. M. (1999). Speaker transformation algorithm using segmental code books (STASC). Speech Communication, 28(3), 211\u2013226.","journal-title":"Speech Communication"},{"key":"9164_CR4","first-page":"1045","volume-title":"Proc. of int. conf. on spoken language process","author":"G. Baudoin","year":"1996","unstructured":"Baudoin, G., & Stylianou, Y. (1996). On the transformation of speech spectrum for voice conversion. In Proc. of int. conf. on spoken language process (Vol.\u00a03, pp. 1045\u20131048)."},{"key":"9164_CR5","first-page":"885","volume-title":"Proc. of int. conf. on acoustics, speech, and signal process","author":"D. T. Chappel","year":"1998","unstructured":"Chappel, D. T., & Hansen, J. H. (1998). Speaker specific pitch contour modeling and modification. In Proc. of int. conf. on acoustics, speech, and signal process (Vol.\u00a02, pp. 885\u2013888). Seattle: IEEE."},{"key":"9164_CR6","doi-asserted-by":"crossref","first-page":"2413","DOI":"10.21437\/Eurospeech.2003-664","volume-title":"Proc. of Eurospeech","author":"Y. Chen","year":"2003","unstructured":"Chen, Y., Chu, M., Chang, E., Liu, J., & Runsheng, L. (2003). Voice conversion using smooth GMM and MAP adaptation. In Proc. of Eurospeech, Geneva (pp. 2413\u20132416)."},{"key":"9164_CR7","first-page":"143","volume":"1","author":"R. Collobert","year":"2001","unstructured":"Collobert, R., & Bengio, S. (2001). SVMTorch: support vector machines for large scale regression problems. Journal on Machine Learning, 1, 143\u2013160.","journal-title":"Journal on Machine Learning"},{"key":"9164_CR8","first-page":"555","volume-title":"Proc. of int. joint conf. on neural networks, IEEE, special session on least squares support vector machines","author":"F. P. Cruz","year":"2004","unstructured":"Cruz, F. P., & Rodr\u00edguez, A. A. (2004). Speeding up the IRWLS convergence to the SVM solution. In Proc. of int. joint conf. on neural networks, IEEE, special session on least squares support vector machines (Vol.\u00a04, pp. 555\u2013560)."},{"key":"9164_CR9","first-page":"757","volume-title":"Proc. of int. conf. on artificial neural networks","author":"F. P. Cruz","year":"2002","unstructured":"Cruz, F. P., Camps, G., Soria, E., Perez, J., Vidal, A. R. F., & Rodriguez, A. A. (2002). Multi-dimensional function approximation and regression estimation. In Proc. of int. conf. on artificial neural networks, Madrid, Spain (Vol.\u00a02, pp. 757\u2013762)."},{"issue":"1","key":"9164_CR10","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1162\/0899766052530875","volume":"17","author":"F. P. Cruz","year":"2005","unstructured":"Cruz, F. P., Calzon, C. B., & Rodriguez, A. A. (2005). Convergence of the IRWLS procedure to the support vector machine solution. Neural Computation, 17(1), 7\u201318.","journal-title":"Neural Computation"},{"issue":"5","key":"9164_CR11","doi-asserted-by":"crossref","first-page":"954","DOI":"10.1109\/TASL.2010.2047683","volume":"18","author":"S. Desai","year":"2010","unstructured":"Desai, S., Black, A. W., Yegnanarayana, B., & Prahallad, K. (2010). Spectral mapping using artificial neural networks for voice conversion. IEEE Transactions on Audio, Speech, and Language Processing, 18(5), 954\u2013964.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"3","key":"9164_CR12","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1109\/LSP.2009.2038507","volume":"17","author":"N. Dhananjaya","year":"2010","unstructured":"Dhananjaya, N., & Yegnarayana, B. (2010). Voiced\/nonvoiced detection based on robustness of voiced epochs. IEEE Signal Processing Letters, 17(3), 273\u2013276.","journal-title":"IEEE Signal Processing Letters"},{"key":"9164_CR13","doi-asserted-by":"crossref","first-page":"3793","DOI":"10.1109\/ICASSP.2009.4960453","volume-title":"Proc. of int. conf. on acoustics, speech, and signal process","author":"T. Drugman","year":"2009","unstructured":"Drugman, T., Moinet, A., Dutoit, T., & Wilfart, G. (2009). Using a pitch synchronous residual codebook for hybrid HMM\/frame selection speech synthesis. In Proc. of int. conf. on acoustics, speech, and signal process (pp. 3793\u20133796). Taipei: IEEE."},{"issue":"8","key":"9164_CR14","doi-asserted-by":"crossref","first-page":"2298","DOI":"10.1109\/TSP.2004.831028","volume":"52","author":"M. S. Fernandez","year":"2004","unstructured":"Fernandez, M. S., Cumplido, M. P., Garc\u00eda, J. A., & Cruz, F. P. (2004). SVM multi-regression for nonlinear channel estimation in multiple-input multiple-output systems. IEEE Transactions on Signal Processing, 52(8), 2298\u20132307.","journal-title":"IEEE Transactions on Signal Processing"},{"issue":"9","key":"9164_CR15","doi-asserted-by":"crossref","first-page":"810","DOI":"10.1109\/LSP.2009.2025824","volume":"16","author":"P. K. Ghosh","year":"2009","unstructured":"Ghosh, P. K., & Narayanan, S. S. (2009). Pitch contour stylization using an optimal piecewise polynomial approximation. IEEE Signal Processing Letters, 16(9), 810\u2013813.","journal-title":"IEEE Signal Processing Letters"},{"issue":"1","key":"9164_CR16","doi-asserted-by":"crossref","first-page":"116","DOI":"10.4156\/jcit.vol6.issue1.14","volume":"6","author":"X. Han","year":"2011","unstructured":"Han, X., Zhao, X., Fang, T., & Jia, X. (2011). Research on EEDSVQ of LSF parameters based on voiced and unvoiced classification. Journal of Convergence Information Technology, 6(1), 116\u2013125.","journal-title":"Journal of Convergence Information Technology"},{"issue":"3","key":"9164_CR17","doi-asserted-by":"crossref","first-page":"806","DOI":"10.1109\/TASL.2011.2165944","volume":"20","author":"E. Helander","year":"2012","unstructured":"Helander, E., Silen, H., Virtanen, T., & Gabbouj, M. (2012). Voice conversion using dynamic kernel partial least squares regression. IEEE Transactions on Speech and Audio Processing, 20(3), 806\u2013817.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9164_CR18","unstructured":"Inanoglu, Z. (2003). Transforming pitch in a voice conversion framework. M.Phil. thesis, St. Edmund\u2019s College University of Cambridge. July, 2003."},{"key":"9164_CR19","first-page":"169","volume-title":"Advances in kernel methods-support vector learning","author":"T. Joachims","year":"1999","unstructured":"Joachims, T. (1999). Making large-scale SVM learning practical. In B. Scholkopf, C. Burges & A. Smola (Eds.), Advances in kernel methods-support vector learning (pp. 169\u2013184). Cambridge: MIT Press."},{"key":"9164_CR20","first-page":"285","volume-title":"Proc. of int. conf. on acoustics, speech, and signal process","author":"A. Kain","year":"1998","unstructured":"Kain, A., & Macon, M. (1998). Spectral voice conversion for text-to-speech synthesis. In Proc. of int. conf. on acoustics, speech, and signal process (Vol.\u00a01, pp. 285\u2013288). New York: IEEE."},{"key":"9164_CR21","first-page":"813","volume-title":"Proc. of int. conf. on acoustics, speech, and signal process","author":"A. Kain","year":"2001","unstructured":"Kain, A., & Macon, M. W. (2001). Design and evaluation of a voice conversion algorithm based on spectral envelop mapping and residual prediction. In Proc. of int. conf. on acoustics, speech, and signal process (Vol.\u00a02, pp. 813\u2013816). New York: IEEE."},{"issue":"3","key":"9164_CR22","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1016\/0167-6393(84)90016-5","volume":"3","author":"H. Kuwabara","year":"1984","unstructured":"Kuwabara, H. (1984). A pitch-synchronous analysis\/synthesis to independently modify formant frequencies and bandwidth for voiced speech. Speech Communication, 3(3), 211\u2013220.","journal-title":"Speech Communication"},{"issue":"2","key":"9164_CR23","doi-asserted-by":"crossref","first-page":"165","DOI":"10.1016\/0167-6393(94)00053-D","volume":"16","author":"H. Kuwabara","year":"1995","unstructured":"Kuwabara, H., & Sagisaka, Y. (1995). Acoustics characteristics of speaker individuality: control and conversion. Speech Communication, 16(2), 165\u2013173.","journal-title":"Speech Communication"},{"issue":"4","key":"9164_CR24","first-page":"59","volume":"2","author":"R. H. Laskar","year":"2011","unstructured":"Laskar, R. H., Talukdar, F. A., Paul, B., & Chakrabarty, D. (2011). Sample reduction using recursive and segmented data structure analysis. Journal of Engineering and Computer Innovations, 2(4), 59\u201367.","journal-title":"Journal of Engineering and Computer Innovations"},{"issue":"2","key":"9164_CR25","doi-asserted-by":"crossref","first-page":"641","DOI":"10.1109\/TASL.2006.876760","volume":"15","author":"K.-S. Lee","year":"2007","unstructured":"Lee, K.-S. (2007). Statistical approach for voice personality transformation. IEEE Transactions on Audio, Speech, and Language Processing, 15(2), 641\u2013651.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9164_CR26","first-page":"1401","volume-title":"Proc. of int. conf. on spoken language process","author":"K. S. Lee","year":"1996","unstructured":"Lee, K. S., Youn, D. H., & Cha, I. W. (1996). A new voice personality transformation based on both linear and non-linear prediction analysis. In Proc. of int. conf. on spoken language process (pp. 1401\u20131404)."},{"key":"9164_CR27","first-page":"119","volume-title":"Proc. of int. speech comm. assoc., speech synthesis workshop","author":"L. Mesbahi","year":"2007","unstructured":"Mesbahi, L., Barreaud, V., & Boeffard, O. (2007). GMM-based speech transformation system under data reduction. In Proc. of int. speech comm. assoc., speech synthesis workshop (pp. 119\u2013124). Bonn, Germany."},{"issue":"1","key":"9164_CR28","doi-asserted-by":"crossref","first-page":"57","DOI":"10.2478\/v10187-010-0008-5","volume":"61","author":"A. Mousa","year":"2010","unstructured":"Mousa, A. (2010). Voice conversion using pitch shifting algorithm by time stretching with PSOLA and re-sampling. Journal of Electrical Engineering, 61(1), 57\u201361.","journal-title":"Journal of Electrical Engineering"},{"issue":"1","key":"9164_CR29","doi-asserted-by":"crossref","first-page":"52","DOI":"10.1109\/LSP.2005.860538","volume":"13","author":"K. S. R. Murthy","year":"2006","unstructured":"Murthy, K. S. R., & Yegnanarayana, B. (2006). Combining evidence from residual phase and MFCC features for speaker recognition. IEEE Signal Processing Letters, 13(1), 52\u201356.","journal-title":"IEEE Signal Processing Letters"},{"issue":"2","key":"9164_CR30","doi-asserted-by":"crossref","first-page":"206","DOI":"10.1016\/0167-6393(94)00058-I","volume":"16","author":"M. Narendranath","year":"1995","unstructured":"Narendranath, M., Murthy, H. A., Rajendran, S., & Yegnanarayana, B. (1995). Transformation of formants for voice conversion using artificial neural networks. Speech Communication, 16(2), 206\u2013216.","journal-title":"Speech Communication"},{"key":"9164_CR31","first-page":"101","volume-title":"Lecture notes in computer science","author":"P. Perrot","year":"2007","unstructured":"Perrot, P., Aversano, G., & Chollet, G. (2007). Voice disguise and automatic detection review and perspective. In Lecture notes in computer science (Vol.\u00a04391, pp. 101\u2013117). Berlin: Springer."},{"key":"9164_CR32","first-page":"185","volume-title":"Advances in kernel methods-support vector learning","author":"J. Platt","year":"1999","unstructured":"Platt, J. (1999). Fast training of support vector machines using sequential minimal optimization. In B. Scholkopf, C. Burges & A. Smola (Eds.), Advances in kernel methods-support vector learning (pp. 185\u2013208). Cambridge: MIT Press."},{"issue":"10","key":"9164_CR33","doi-asserted-by":"crossref","first-page":"1243","DOI":"10.1016\/j.specom.2006.06.002","volume":"48","author":"S. R. M. Prasanna","year":"2006","unstructured":"Prasanna, S. R. M., Gupta, C. S., & Yegnanarayana, B. (2006). Extraction of speaker-specific information from linear prediction residual of speech. Speech Communication, 48(10), 1243\u20131261.","journal-title":"Speech Communication"},{"issue":"3","key":"9164_CR34","doi-asserted-by":"crossref","first-page":"474","DOI":"10.1016\/j.csl.2009.03.003","volume":"24","author":"K. S. Rao","year":"2010","unstructured":"Rao, K. S. (2010). Voice conversion by mapping the speaker-specific features using pitch synchronous approach. Computer Speech & Language Processing, 24(3), 474\u2013494.","journal-title":"Computer Speech & Language Processing"},{"issue":"3","key":"9164_CR35","doi-asserted-by":"crossref","first-page":"972","DOI":"10.1109\/TSA.2005.858051","volume":"14","author":"K. S. Rao","year":"2006","unstructured":"Rao, K. S., & Yegnanarayana, B. (2006). Prosody modification using instants of significant excitation. IEEE Transactions on Audio, Speech, and Language Processing, 14(3), 972\u2013980.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"9164_CR36","first-page":"479","volume-title":"Lecture notes in computer sciences","author":"K. S. Rao","year":"2007","unstructured":"Rao, K. S., Laskar, R. H., & Koolagudi, S. G. (2007). Voice transformation by mapping the features at syllable level. In Lecture notes in computer sciences (Vol.\u00a04815, pp. 479\u2013486). Berlin: Springer."},{"issue":"2","key":"9164_CR37","doi-asserted-by":"crossref","first-page":"131","DOI":"10.1109\/89.661472","volume":"6","author":"Y. Stylianou","year":"1998","unstructured":"Stylianou, Y., Cappe, Y., & Moulines, E. (1998). Continuous probabilistic transform for voice conversion. IEEE Transactions on Speech and Audio Processing, 6(2), 131\u2013142.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9164_CR38","doi-asserted-by":"crossref","first-page":"676","DOI":"10.1109\/ASRU.2003.1318521","volume-title":"Proc. of automatic speech recognition and understanding workshop","author":"D. Suendermann","year":"2003","unstructured":"Suendermann, D., Ney, H., & Hoege, H. (2003). VTLN-based cross-language voice conversion. In Proc. of automatic speech recognition and understanding workshop (pp. 676\u2013681). New York: IEEE."},{"key":"9164_CR39","volume-title":"Proc. of ACL\/SEPLN 2004, 42nd annu. meeting assoc. for comput. Linguistics\/XX congreso de la sociedad espanola para el procesamiento del lenguaje natural","author":"D. Sundermann","year":"2004","unstructured":"Sundermann, D., Bonafonte, A., Hoge, H., & Ney, H. (2004). Voice conversion using exclusively unaligned training data. In Proc. of ACL\/SEPLN 2004, 42nd annu. meeting assoc. for comput. Linguistics\/XX congreso de la sociedad espanola para el procesamiento del lenguaje natural, Barcelona, Spain, July, 2004."},{"key":"9164_CR40","first-page":"13","volume-title":"Proc. of int. conf. on acoustics, speech, and signal process","author":"D. Suendermann","year":"2005","unstructured":"Suendermann, D., Bonafonte, A., Ney, H., & Hoege, H. (2005a). A study on residual prediction techniques for voice conversion. In Proc. of int. conf. on acoustics, speech, and signal process (pp. 13\u201316). New York: IEEE."},{"key":"9164_CR41","doi-asserted-by":"crossref","first-page":"369","DOI":"10.1109\/ASRU.2005.1566484","volume-title":"Proc. of automatic speech recognition and understanding workshop","author":"D. Suendermann","year":"2005","unstructured":"Suendermann, D., Hoege, H., Bonafonte, A., Ney, H., & Black, A. (2005b). Residual prediction based on unit selection. In Proc. of automatic speech recognition and understanding workshop (pp. 369\u2013374). New York: IEEE."},{"key":"9164_CR42","first-page":"841","volume-title":"Proc. of int. conf. on acoustics, speech, and signal process","author":"T. Toda","year":"2001","unstructured":"Toda, T., Saruwatari, H., & Shikano, K. (2001). Voice conversion algorithm based on Gaussian mixture model with dynamic frequency warping of STRAIGHT spectrum. In Proc. of int. conf. on acoustics, speech, and signal process (Vol.\u00a02, pp. 841\u2013844). New York: IEEE."},{"key":"9164_CR43","doi-asserted-by":"crossref","first-page":"1088","DOI":"10.21437\/Interspeech.2008-335","volume-title":"Proc. of interspeech","author":"A. Toth","year":"2008","unstructured":"Toth, A., & Black, A. W. (2008). Incorporating durational modification in voice transformation. In Proc. of interspeech, Brisbane, Australia (pp. 1088\u20131091)."},{"issue":"4","key":"9164_CR44","doi-asserted-by":"crossref","first-page":"441","DOI":"10.1016\/j.csl.2005.06.001","volume":"20","author":"O. Turk","year":"2006","unstructured":"Turk, O., & Arslan, L. M. (2006). Robust processing techniques for voice conversion. Computer Speech & Language Processing, 20(4), 441\u2013467.","journal-title":"Computer Speech & Language Processing"},{"key":"9164_CR45","first-page":"365","volume-title":"Proc. of int. conf. on acoustics, speech, and signal process","author":"W. Verhelst","year":"1996","unstructured":"Verhelst, W., & Mertens, J. (1996). Voice conversion using partitions of spectral feature space. In Proc. of int. conf. on acoustics, speech, and signal process (Vol.\u00a01, pp. 365\u2013368). New York: IEEE."},{"issue":"13","key":"9164_CR46","doi-asserted-by":"crossref","first-page":"2772","DOI":"10.1016\/j.neucom.2007.09.008","volume":"71","author":"D. Wang","year":"2008","unstructured":"Wang, D., & Shi, L. (2008). Selecting valuable training samples for SVMs via data structure analysis. Neurocomputing, 71(13), 2772\u20132781.","journal-title":"Neurocomputing"},{"key":"9164_CR47","first-page":"9","volume-title":"Proc. of int. conf. on acoustics, speech, and signal process","author":"H. Ye","year":"2004","unstructured":"Ye, H., & Young, S. (2004). High quality voice morphing. In Proc. of int. conf. on acoustics, speech, and signal process (Vol.\u00a0I, pp. 9\u201312). New York: IEEE."},{"key":"9164_CR48","first-page":"409","volume-title":"Proc. of int. conf. on acoustics, speech, and signal process","author":"B. Yegnanarayana","year":"2001","unstructured":"Yegnanarayana, B., Reddy, K. S., & Kishore, S. P. (2001). Source and system features for speaker recognition using AANN models. In Proc. of int. conf. on acoustics, speech, and signal process (Vol.\u00a01, pp. 409\u2013412). New York: IEEE."},{"issue":"4","key":"9164_CR49","doi-asserted-by":"crossref","first-page":"313","DOI":"10.1109\/89.701359","volume":"6","author":"B. Yegnarayana","year":"1998","unstructured":"Yegnarayana, B., & Veldhuis, R. N. J. (1998). Extraction of vocal-tract system characteristics from speech signals. IEEE Transactions on Speech and Audio Processing, 6(4), 313\u2013327.","journal-title":"IEEE Transactions on Speech and Audio Processing"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-012-9164-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-012-9164-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-012-9164-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,23]],"date-time":"2023-06-23T21:55:54Z","timestamp":1687557354000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-012-9164-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,6,26]]},"references-count":49,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2012,9]]}},"alternative-id":["9164"],"URL":"https:\/\/doi.org\/10.1007\/s10772-012-9164-2","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,6,26]]}}}