{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,5,7]],"date-time":"2023-05-07T16:40:25Z","timestamp":1683477625397},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2005,9,1]],"date-time":"2005-09-01T00:00:00Z","timestamp":1125532800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2005,9]]},"DOI":"10.1007\/s10772-006-5692-y","type":"journal-article","created":{"date-parts":[[2006,6,2]],"date-time":"2006-06-02T08:54:46Z","timestamp":1149238486000},"page":"227-245","source":"Crossref","is-referenced-by-count":6,"title":["Parametric Formant Modelling and Transformation in Voice Conversion"],"prefix":"10.1007","volume":"8","author":[{"given":"Dimitrios","family":"Rentzos","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Saeed","family":"Vaseghi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qin","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ching-Hsiang","family":"Ho","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2006,6,2]]},"reference":[{"key":"5692_CR1","doi-asserted-by":"crossref","unstructured":"Abe, M., Nakamura, S., Shikano, K., and Kuwabara, H. (1988). Voice conversion through vector quantization, In Proceedings of ICASSP 1998, pp. 565\u2013568.","DOI":"10.1109\/ICASSP.1988.196671"},{"key":"5692_CR2","unstructured":"Acero, A. (1999). Formant analysis and synthesis using hidden markov models, In Proc. of the Eurospeech Conference, Volume 3, Page 1047\u20131050."},{"key":"5692_CR3","volume-title":"From Text to Speech: The MITalk System","author":"J. Allen","year":"1987","unstructured":"Allen, J. Hunnicutt, S. Klatt, D. (1987). From Text to Speech: The MITalk System. Cambridge, Cambridge University Press."},{"key":"5692_CR4","doi-asserted-by":"crossref","unstructured":"Arslan L.M. and Talkin, D. (1997). Voice Conversion by codebook mapping of line spectral frequencies and excitation spectrum, EUROSPEECH 1997 Proceedings.","DOI":"10.21437\/Eurospeech.1997-383"},{"key":"5692_CR5","unstructured":"Bazzi, I., Acero, A., and Deng, Li. (2003). An expectation maximazation approach for Formant Tracking Using a Parameter-free Non-Linear Predictor. In Proc. ICASSP 2003, pp. 464\u2013467."},{"issue":"July","key":"5692_CR6","first-page":"1","volume":"8","author":"J.E. Cahn","year":"1990","unstructured":"Cahn, J.E. (1990). The generation of affect in synthesized speech, Journal of the American Voice I\/O Society, 8(July): 1\u201319.","journal-title":"Journal of the American Voice I\/O Society"},{"key":"5692_CR7","doi-asserted-by":"crossref","first-page":"481","DOI":"10.1016\/0167-6393(91)90051-T","volume":"10","author":"R. Carlson","year":"1991","unstructured":"Carlson, R., Granstrom, B., and Karlsson, I. (1991). Experiments with voice modelling in speech synthesis. Speech Communication, 10: 481\u2013489.","journal-title":"Speech Communication"},{"key":"5692_CR8","unstructured":"Carlson, R., Sigvardson, T. and Arvid, Sjolander. (2002). Data-driven formant synthesis, TMH-QPSR Vol.44 \u2013 Fonetik 2002."},{"key":"5692_CR9","unstructured":"Chen, Y., Chu, M., Chang, E., Liu, J., and Liu, R. (2003). Voice conversion with smoothed gmm and map adaptation, In Proc. Eurospeech 2003, pp. 2413\u20132416."},{"key":"5692_CR10","doi-asserted-by":"crossref","unstructured":"De Boor, C. (1978). A Practical Guide to Splines, Springer-Verlag.","DOI":"10.1007\/978-1-4612-6333-3"},{"key":"5692_CR11","unstructured":"Edrington, M. Lowry, A. Jackson, P. Breen, A. Minnis, S. (1998), Overview of Current Text-to-Speech Techniques: Part II - Prosody and Speech Generation, in Speech Technology for Telecommunications, Chapman & Hall, London, UK."},{"key":"5692_CR12","doi-asserted-by":"crossref","first-page":"393","DOI":"10.1016\/S0095-4470(19)30714-4","volume":"14","author":"G. Fant","year":"1986","unstructured":"Fant G. (1986), Glottal flow: Models and interaction, Journal of Phonetics, 14: 393\u2013399.","journal-title":"Journal of Phonetics"},{"key":"5692_CR13","volume-title":"Digital Speech Processing, Synthesis, and Recognition","author":"S. Furui","year":"1989","unstructured":"Furui, S. (1989). Digital Speech Processing, Synthesis, and Recognition, Marcel Dekker, New York."},{"key":"5692_CR14","doi-asserted-by":"crossref","unstructured":"Ho, C.H., Rentzos, D. Vaseghi, and S. (2002). Formant model estimation and transformation for voice morphing. In Proc. ICSLP, pp. 2149\u20132152.","DOI":"10.21437\/ICSLP.2002-587"},{"key":"5692_CR15","doi-asserted-by":"crossref","unstructured":"Holmes, J. Holmes, W. and Garner, P. (1997). Using formant frequencies in speech recognition. In Proc. Eurospeech-97, vol. 4, pp. 2083\u20132086.","DOI":"10.21437\/Eurospeech.1997-551"},{"key":"5692_CR16","doi-asserted-by":"crossref","unstructured":"Horne, M. (ed). (2000), Prosody: Theory and Experiment. Studies Presented to G\u00f6sta Bruce. Kluwer Academic Publishers, Dordrecht.","DOI":"10.1007\/978-94-015-9413-4"},{"key":"5692_CR17","doi-asserted-by":"crossref","unstructured":"Iwahashi N. and Sagisaka, Y. (1994). Speech Spectrum transformation by speaker interpolation, In Proceedings IEEE Int. Conference Acoustics, Speech Signal Processing.","DOI":"10.1109\/ICASSP.1994.389256"},{"key":"5692_CR18","doi-asserted-by":"crossref","unstructured":"Kain, A and Macon, M.W. (1998). Spectral voice conversion for text-to-speech synthesis. Proceedings of ICASSP, vol. 1, pp. 285\u2013288.","DOI":"10.1109\/ICASSP.1998.674423"},{"key":"5692_CR19","doi-asserted-by":"crossref","unstructured":"Kopec, D.H. (1986). Formant tracking using hidden Markov models and vector quantisation. IEEE Trans on Acoust., Speech, Signal Processing, Vol. ASSP-34, No 4, pp. 709\u2013729.","DOI":"10.1109\/TASSP.1986.1164908"},{"key":"5692_CR20","unstructured":"Kuwabara, H. and Sagisaka, Y. (1995). Acoustic characteristics of speaker individuality: Control and Conversion. 16: 165\u2013173, Feb."},{"key":"5692_CR21","unstructured":"Lee, M. van Santen, J. Mobius, B. Olive, J. (1999). Formant tracking using segmental phonemic information\u201d In Proceedings of the Eurospeech 1999, vol. 6, 2789\u20132792."},{"key":"5692_CR22","unstructured":"McAulay, R.J. and Quatieri, T.F. (1995). Sinusoidal coding, in speech coding and synthesis. In W.B. Kleijn and K.K. Paliwal, (Eds.) Elsevier Science, Hol, 4, pp. 121\u2013173."},{"key":"5692_CR23","doi-asserted-by":"crossref","first-page":"453","DOI":"10.1016\/0167-6393(90)90021-Z","volume":"9","author":"E. Moulines","year":"1990","unstructured":"Moulines, E. and Charpentier, F. (1990). Pitch-synchronous waveform processing techniques for text-to-speech synthesis using diphones, Speech Communication, 9: 453\u2013467.","journal-title":"Speech Communication"},{"key":"5692_CR24","doi-asserted-by":"crossref","unstructured":"Rao, A. and Kumaresan, R. (2000), On decomposing speech into modulated components. IEEE Trans. Speech and Audio Proc. 8(3): 240\u2013254.","DOI":"10.1109\/89.841207"},{"key":"5692_CR25","unstructured":"Rabiner L, Juang BH. (1993). Fundamentals of speech recognition, Prentice Hall, Englewood Cliffs."},{"key":"5692_CR26","unstructured":"Slaney, M., Covell, M., and Lassiter, B. (1996). Automatic audio morphing, In Proceedings of the 1996 ICASSP, Vol. 2 pp. 1001\u20131004."},{"key":"5692_CR27","unstructured":"Styger, T and Keller E. (1994). Formant synthesis. In E. Keller (Ed.), Fundamentals in Speech Synthesis and Speech Recognition, pp. 109\u2013128. Wiley."},{"key":"5692_CR28","doi-asserted-by":"crossref","unstructured":"Stylianou, Y., Cappe, O., and Moulines, E. (1998). Continuous Probabilistic Transform for Voice Conversion, IEEE transactions on speech & audio processing, Vol.6, No.2, pp. 131\u2013142.","DOI":"10.1109\/89.661472"},{"key":"5692_CR29","doi-asserted-by":"crossref","unstructured":"Tang, M., C. Wang, and S. Seneff, (2001). Voice transformations: from speech synthesis to mammalian vocalizations. In Proceedings of the 7th European Conference on Speech Communication and Technology, Denmark 2001.","DOI":"10.21437\/Eurospeech.2001-109"},{"key":"5692_CR30","unstructured":"Turk, O. and Arslan, L.M. (2002). Subband based voice conversion, In Proceedings of the 2002 International Conference on Spoken Language Processing, pp. 289\u2013292."},{"key":"5692_CR31","doi-asserted-by":"crossref","unstructured":"Valbret H., Moulines, E. and Tubach, J.P. (1992). Voice transformation using PSOLA techniques, Speech Communication, vol. 11, pp. 175\u2013187.","DOI":"10.1016\/0167-6393(92)90012-V"},{"key":"5692_CR32","doi-asserted-by":"crossref","unstructured":"Weber K., Ikbal S., Bengio S., and Bourlard H., (2003). Robust speech recognition and feature extraction using HMM2, Computer Speech and Language 17, pp. 195\u2013211.","DOI":"10.1016\/S0885-2308(03)00012-3"},{"key":"5692_CR33","unstructured":"Woodland, P.C. and Young, S.J. (1993). The HTK Continuous Speech Recogniser. Proceedings Eurospeech 1993, pp. 2207\u20132219."},{"key":"5692_CR34","doi-asserted-by":"crossref","unstructured":"Xia, K. and Espy-Wilson, C. (2000). A new strategy of formant tracking based on dynamic programming. Intern. Conf. on Spoken Language Processing, Oct. 2000, pp. III 55\u201358.","DOI":"10.21437\/ICSLP.2000-476"},{"key":"5692_CR35","doi-asserted-by":"crossref","unstructured":"Yan, Q., Vaseghi, S., Ho, C.H., Rentzos, D., Turajlic, E. (2003). Comparative analysis and synthesis of formant trajectories of british and broad australian accents. Proceedings of Eurospeech 2003, pp. 2941\u20132944.","DOI":"10.21437\/Eurospeech.2003-590"},{"key":"5692_CR36","doi-asserted-by":"crossref","unstructured":"Yegnanarayana, B. and Veldhuis R.N.J.(1998). Extraction of vocal-tract system characteristics from speech signal. IEEE Trans. On Speech and Audio Processing, vol. 6, pp. 313\u2013327.","DOI":"10.1109\/89.701359"},{"key":"5692_CR37","unstructured":"Zhan P. & Westphal, M. (1997). Speaker normalisation based on frequency warping in proceedings of ICASSP 1997, pp. 1039\u20131042."}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-006-5692-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-006-5692-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-006-5692-y","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,7]],"date-time":"2023-05-07T16:11:45Z","timestamp":1683475905000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-006-5692-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2005,9]]},"references-count":37,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2005,9]]}},"alternative-id":["5692"],"URL":"https:\/\/doi.org\/10.1007\/s10772-006-5692-y","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2005,9]]}}}