{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:51:54Z","timestamp":1740099114163,"version":"3.37.3"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319995786"},{"type":"electronic","value":"9783319995793"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-99579-3_2","type":"book-chapter","created":{"date-parts":[[2018,8,24]],"date-time":"2018-08-24T07:36:09Z","timestamp":1535096169000},"page":"11-20","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["A Continuous Vocoder Using Sinusoidal Model for Statistical Parametric Speech Synthesis"],"prefix":"10.1007","author":[{"given":"Mohammed Salah","family":"Al-Radhi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tam\u00e1s G\u00e1bor","family":"Csap\u00f3","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"G\u00e9za","family":"N\u00e9meth","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,8,25]]},"reference":[{"issue":"11","key":"2_CR1","doi-asserted-by":"publisher","first-page":"1039","DOI":"10.1016\/j.specom.2009.04.004","volume":"51","author":"H Zen","year":"2009","unstructured":"Zen, H., Tokuda, K., Black, A.: Statistical parameteric speech synthesis. Speech Commun. 51(11), 1039\u20131064 (2009)","journal-title":"Speech Commun."},{"issue":"3","key":"2_CR2","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1016\/S0167-6393(98)00085-5","volume":"27","author":"H Kawahara","year":"1999","unstructured":"Kawahara, H., Masuda-Katsuse, I., de Cheveign, A.: Restructuring speech representations using a pitch-adaptive time\u2013frequency smoothing and an instantaneous-frequency-based F0 extraction: possible role of a repetitive structure in sounds. Speech Commun. 27(3), 187\u2013207 (1999)","journal-title":"Speech Commun."},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Drugman, T., Wilfart, G., Dutoit, T.: A deterministic plus stochastic model of the residual signal for improved parametric speech synthesis. In: 10th Proceedings of Interspeech, Brighton, pp. 1779\u20131782 (2009)","DOI":"10.21437\/Interspeech.2009-148"},{"issue":"E99-D","key":"2_CR4","doi-asserted-by":"publisher","first-page":"1877","DOI":"10.1587\/transinf.2015EDP7457","volume":"7","author":"M Morise","year":"2016","unstructured":"Morise, M., Yokomori, F., Ozawa, K.: WORLD: a vocoder-based high-quality speech synthesis system for real-time applications. IEICE Trans. Inf. Syst. 7(E99-D), 1877\u20131884 (2016)","journal-title":"IEICE Trans. Inf. Syst."},{"key":"2_CR5","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1007\/978-3-319-25789-1_4","volume-title":"Statistical Language and Speech Processing","author":"TG Csap\u00f3","year":"2015","unstructured":"Csap\u00f3, T.G., N\u00e9meth, G., Cernak, M.: Residual-based excitation with continuous F0 modeling in HMM-based speech synthesis. In: Dediu, A.-H., Mart\u00edn-Vide, C., Vicsi, K. (eds.) SLSP 2015. LNCS (LNAI), vol. 9449, pp. 27\u201338. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-25789-1_4"},{"issue":"1","key":"2_CR6","doi-asserted-by":"publisher","first-page":"102","DOI":"10.1109\/LSP.2012.2231675","volume":"20","author":"PN Garner","year":"2013","unstructured":"Garner, P.N., Cernak, M., Motlicek, P.: A simple continuous pitch estimation algorithm. IEEE Signal Process. Lett. 20(1), 102\u2013105 (2013)","journal-title":"IEEE Signal Process. Lett."},{"issue":"10","key":"2_CR7","doi-asserted-by":"publisher","first-page":"1230","DOI":"10.1109\/LSP.2014.2332186","volume":"21","author":"T Drugman","year":"2014","unstructured":"Drugman, T., Stylianou, Y.: Maximum voiced frequency estimation: exploiting amplitude and phase spectra. IEEE Signal Process. Lett. 21(10), 1230\u20131234 (2014)","journal-title":"IEEE Signal Process. Lett."},{"key":"2_CR8","doi-asserted-by":"crossref","unstructured":"Al-Radhi, M.S., Csap\u00f3, T.G., N\u00e9meth, G.: Time-domain envelope modulating the noise component of excitation in a continuous residual-based vocoder for statistical parametric speech synthesis. In: 18th Proceedings of Interspeech, Stockholm, pp. 434\u2013438 (2017)","DOI":"10.21437\/Interspeech.2017-678"},{"issue":"4","key":"2_CR9","doi-asserted-by":"publisher","first-page":"744","DOI":"10.1109\/TASSP.1986.1164910","volume":"34","author":"RJ McAulay","year":"1986","unstructured":"McAulay, R.J., Quatieri, T.F.: Speech analysis\/synthesis based on a sinusoidal representation. IEEE Trans. Acoust. Speech Signal Process. 34(4), 744\u2013754 (1986)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"2_CR10","doi-asserted-by":"crossref","unstructured":"Stylianou, Y., Laroche, J., Moulines, E.: High-quality speech modification based on a harmonic + noise model. In: Proceedings of Eurospeech, Madrid, pp. 451\u2013454 (1995)","DOI":"10.21437\/Eurospeech.1995-122"},{"key":"2_CR11","doi-asserted-by":"crossref","unstructured":"Degottex, G., Stylianou, Y.: A full-band adaptive harmonic representation of speech. In: 13th Proceedings of Interspeech, Portland (2012)","DOI":"10.21437\/Interspeech.2012-138"},{"key":"2_CR12","doi-asserted-by":"crossref","unstructured":"Hu, Q., Stylianou, Y., Maia, R., Richmond, K., Yamagishi, J.: Methods for applying dynamic sinusoidal models to statistical parametric speech synthesis. In: International Conference on Acoustics, Speech and Signal Processing (ICASSP), South Brisbane, pp. 4889\u20134893. IEEE (2015)","DOI":"10.1109\/ICASSP.2015.7178900"},{"key":"2_CR13","doi-asserted-by":"crossref","unstructured":"Tokuda, K., Kobayashi, T., Masuko, T., Imai, S.: Mel-generalized cepstral analysis - a unified approach to speech spectral estimation. In: 8th International Conference on Spoken Language Processing (ICSLP), Yokohama, pp. 1043\u20131046 (1994)","DOI":"10.21437\/ICSLP.1994-275"},{"issue":"2","key":"2_CR14","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1002\/ecja.4400660203","volume":"66","author":"S Imai","year":"1983","unstructured":"Imai, S., Sumita, K., Furuichi, C.: Mel Log Spectrum Approximation (MLSA) filter for speech synthesis. Electron. Commun. Jpn. (Part I: Communications) 66(2), 10\u201318 (1983)","journal-title":"Electron. Commun. Jpn. (Part I: Communications)"},{"key":"2_CR15","first-page":"497","volume-title":"Speech Coding and Synthesis","author":"D Talkin","year":"1995","unstructured":"Talkin, D.: A robust algorithm for pitch tracking (RAPT). In: Kleijn, B., Paliwal, K. (eds.) Speech Coding and Synthesis, pp. 497\u2013518. Elsevier, Amesterdam (1995)"},{"issue":"6","key":"2_CR16","doi-asserted-by":"publisher","first-page":"552","DOI":"10.1109\/TASSP.1975.1162749","volume":"23","author":"L Rabiner","year":"1975","unstructured":"Rabiner, L., Sambur, M., Schmidt, C.: Applications of a nonlinear smoothing algorithm to speech processing. IEEE Trans. Acoust. Speech Signal Process. 23(6), 552\u2013557 (1975)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"issue":"2","key":"2_CR17","doi-asserted-by":"publisher","first-page":"184","DOI":"10.1109\/JSTSP.2013.2283471","volume":"8","author":"D Erro","year":"2014","unstructured":"Erro, D., Sainz, I., Navas, E., Hernaez, I.: Harmonics plus noise model based vocoder for statistical parametric speech synthesis. IEEE J. Sel. Top. Signal Process. 8(2), 184\u2013194 (2014)","journal-title":"IEEE J. Sel. Top. Signal Process."},{"key":"2_CR18","unstructured":"Kominek, J., Black, A.W.: CMU ARCTIC databases for speech synthesis. Language Technologies Institute (2003). http:\/\/festvox.org\/cmu_arctic\/"},{"issue":"5","key":"2_CR19","doi-asserted-by":"publisher","first-page":"713","DOI":"10.1007\/s12046-011-0043-3","volume":"36","author":"H Kawahara","year":"2011","unstructured":"Kawahara, H., Morise, M.: Technical foundations of TANDEM-STRAIGHT, a speech analysis, modification and synthesis framework. Sadhana 36(5), 713\u2013727 (2011)","journal-title":"Sadhana"},{"issue":"5","key":"2_CR20","doi-asserted-by":"publisher","first-page":"3387","DOI":"10.1121\/1.3097493","volume":"125","author":"J Ma","year":"2009","unstructured":"Ma, J., Hu, Y., Loizou, P.: Objective measures for predicting speech intelligibility in noisy conditions based on new band-importance functions. Acoust. Soc. Am. 125(5), 3387\u20133405 (2009)","journal-title":"Acoust. Soc. Am."},{"issue":"11","key":"2_CR21","doi-asserted-by":"publisher","first-page":"2009","DOI":"10.1109\/TASLP.2016.2585878","volume":"24","author":"J Jensen","year":"2016","unstructured":"Jensen, J., Taal, C.H.: An algorithm for predicting the intelligibility of speech masked by modulated noise maskers. IEEE\/ACM Trans. Audio Speech Lang. Process. 24(11), 2009\u20132022 (2016)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2_CR22","unstructured":"Itakura, F., Saito, S.: An analysis-synthesis telephony based on the maximum-likelihood method. In: Proceedings of the 6th International Congress on Acoustics, Tokyo, pp. C17\u2013C20 (1968)"},{"issue":"4","key":"2_CR23","doi-asserted-by":"publisher","first-page":"1218","DOI":"10.1109\/TSA.2005.860851","volume":"14","author":"J Chen","year":"2006","unstructured":"Chen, J., Benesty, J., Huang, Y., Doclo, S.: New insights into the noise reduction Wiener filter. IEEE Trans. Audio Speech Lang. Process. 14(4), 1218\u20131234 (2006)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"2_CR24","unstructured":"ITU-R Recommendation BS.1534. Method for the subjective assessment of intermediate audio quality (2001)"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-99579-3_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,30]],"date-time":"2022-08-30T04:51:41Z","timestamp":1661835101000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-99579-3_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319995786","9783319995793"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-99579-3_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]}}}