{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T15:59:20Z","timestamp":1761580760994,"version":"3.37.3"},"reference-count":85,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2011,1,1]],"date-time":"2011-01-01T00:00:00Z","timestamp":1293840000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2011,1]]},"DOI":"10.1109\/tasl.2010.2045239","type":"journal-article","created":{"date-parts":[[2010,3,12]],"date-time":"2010-03-12T13:56:47Z","timestamp":1268402207000},"page":"153-165","source":"Crossref","is-referenced-by-count":116,"title":["HMM-Based Speech Synthesis Utilizing Glottal Inverse Filtering"],"prefix":"10.1109","volume":"19","author":[{"given":"Tuomo","family":"Raitio","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Antti","family":"Suni","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junichi","family":"Yamagishi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hannu","family":"Pulakka","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jani","family":"Nurminen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Martti","family":"Vainio","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Paavo","family":"Alku","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref73","first-page":"2266","article-title":"maximum likelihood voice conversion based on gmm with straight mixed excitation","author":"ohtani","year":"2006","journal-title":"Proc INTERSPEECH"},{"key":"ref72","article-title":"aperiodicity extraction and control using mixed mode excitation and group delay manipulation for a high quality speech analysis, modification and synthesis system straight","author":"kawahara","year":"2001","journal-title":"Proc 2nd Int Workshop Models Anal Vocal Emissions for Biomed Applicat (MAVEBA)"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1983.1172250"},{"key":"ref70","doi-asserted-by":"crossref","first-page":"93","DOI":"10.21437\/Interspeech.2005-76","article-title":"an overview of nitech hmm-based speech synthesis system for blizzard challenge 2005","author":"zen","year":"2005","journal-title":"Proc INTERSPEECH"},{"key":"ref76","first-page":"495","author":"talkin","year":"1995","journal-title":"Speech Coding and Synthesis"},{"journal-title":"ESPS Programs Version 5 0","year":"1993","key":"ref77"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1121\/1.412276"},{"key":"ref74","first-page":"2812","article-title":"robust f0 estimation of speech signal using harmonicity measure based on instantaneous frequency","volume":"e87 d","author":"arifianto","year":"2004","journal-title":"IEICE Trans Inf Syst"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.857807"},{"key":"ref75","doi-asserted-by":"crossref","first-page":"2781","DOI":"10.21437\/Eurospeech.1999-696","article-title":"Fixed point analysis of frequency to instantaneous frequency mapping for accurate estimation of F0 and periodicity","author":"kawahara","year":"1999","journal-title":"Proc EUROSPEECH"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(90)90021-Z"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1992.225953"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1979.1163260"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(92)90005-R"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-01562-9"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1121\/1.1907771"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2005.01.007"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1121\/1.1379076"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/89.784109"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1121\/1.1903487"},{"key":"ref60","first-page":"455","article-title":"multi-space probability distribution hmm","volume":"e85 d","author":"tokuda","year":"2002","journal-title":"IEICE Trans Inf Syst"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e90-d.5.825"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1016\/S0885-2308(86)80009-2"},{"journal-title":"Hidden Markov model based finnish text-to-speech system utilizing glottal inverse filtering","year":"2008","author":"raitio","key":"ref28"},{"key":"ref63","first-page":"309","article-title":"accent and prominence in finnish speech synthesis","author":"vainio","year":"2005","journal-title":"Proc 10th Int Conf Speech Comput (Specom 2005)"},{"key":"ref27","article-title":"the blizzard challenge 2008","author":"karaiskos","year":"2008","journal-title":"Proc Blizzard Challenge Workshop"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-87391-4_68"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1250\/ast.21.79"},{"key":"ref29","doi-asserted-by":"crossref","first-page":"1881","DOI":"10.21437\/Interspeech.2008-189","article-title":"hmm-based finnish text-to-speech system utilizing glottal inverse filtering","author":"raitio","year":"2008","journal-title":"Proc INTERSPEECH"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1121\/1.2151809"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-80043-6"},{"journal-title":"GNU Scientific Library Reference Manual","year":"2009","author":"galassi","key":"ref68"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"757","DOI":"10.21437\/Eurospeech.1995-173","article-title":"an algorithm for speech parameter generation from continuous mixture hmms with dynamic features","volume":"1","author":"tokuda","year":"1995","journal-title":"Proc EUROSPEECH"},{"key":"ref69","article-title":"ustc system for blizzard challenge 2006: an improved hmm-based speech synthesis method","author":"ling","year":"2006","journal-title":"Proc Blizzard Challenge Workshop"},{"key":"ref1","first-page":"2374","article-title":"simultaneous modeling of spectrum, pitch and duration in hmm-based speech synthesis","author":"yoshimura","year":"1999","journal-title":"Proc EUROSPEECH"},{"key":"ref20","first-page":"113","article-title":"towards an improved modeling of the glottal source in statistical parametric speech synthesis","author":"cabral","year":"2007","journal-title":"Proc 6th ISCA Workshop Speech Synth"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TAU.1973.1162466"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"1829","DOI":"10.21437\/Interspeech.2008-176","article-title":"glottal spectral separation for parametric speech synthesis","author":"cabral","year":"2008","journal-title":"Proc INTERSPEECH"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/S1388-2457(99)00088-7"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1991.150452"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e90-1.1.325"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(98)00085-5"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.3109\/14015439709075330"},{"key":"ref51","first-page":"294","article-title":"the hmm-based speech synthesis system (hts) version 2.0","author":"zen","year":"2007","journal-title":"Proc 6th ISCA Workshop Speech Synth"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1121\/1.426901"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1977.1162905"},{"journal-title":"Speech Coding and Synthesis","year":"1995","author":"paliwal","key":"ref57"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1984.1172448"},{"key":"ref55","article-title":"an investigation of spectral parameters for hmm-based speech synthesis","author":"marume","year":"2006","journal-title":"Proc Autumn Meeting Acoust Soc Jpn"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1984.1172448"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(83)90064-X"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1121\/1.395275"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1121\/1.398894"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1986.1164909"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1121\/1.402044"},{"key":"ref12","first-page":"40","article-title":"the government standard linear predictive coding algorithm: lpc-10","volume":"1","author":"tremain","year":"1982","journal-title":"Speech Technol"},{"journal-title":"Digital Processing of Speech Signals","year":"1978","author":"rabiner","key":"ref13"},{"key":"ref14","first-page":"2259","article-title":"mixed excitation for hmm-based speech synthesis","author":"yoshimura","year":"2001","journal-title":"Proc EUROSPEECH"},{"key":"ref15","article-title":"an excitation model for hmm-based speech synthesis based on residual modeling","author":"maia","year":"2007","journal-title":"Proc 6th ISCA Workshop Speech Synth"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e90-1.1.378"},{"journal-title":"Methods for subjective determination of transmission quality","year":"1996","key":"ref82"},{"key":"ref17","first-page":"1","article-title":"a four-parameter model of glottal flow","volume":"4","author":"fant","year":"1985","journal-title":"STL-QPSR"},{"journal-title":"Artificial neural network based prosody models for Finnish text-to-speech synthesis","year":"2001","author":"vainio","key":"ref81"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.2307\/3001968"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1989.266405"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(96)00026-X"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(91)90051-T"},{"journal-title":"HMM-Based Speech Synthesis System","year":"2009","key":"ref80"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.04.004"},{"key":"ref3","first-page":"227","article-title":"an hmm-based speech synthesis system applied to english","author":"tokuda","year":"2002","journal-title":"Proc IEEE Workshop Speech Synth"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e88-d.11.2484"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2008.2006647"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e90-d.5.816"},{"key":"ref85","doi-asserted-by":"crossref","first-page":"573","DOI":"10.21437\/Interspeech.2008-169","article-title":"articulatory control of hmm-based parametric speech synthesis driven by phonetic knowledge","author":"ling","year":"2008","journal-title":"Proc INTERSPEECH"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(02)00082-1"},{"key":"ref7","first-page":"889","article-title":"minimum generation error training for hmm-based speech synthesis","volume":"1","author":"wu","year":"2006","journal-title":"Proc ICASSP"},{"journal-title":"Acoustic Theory of Speech Production","year":"1960","author":"fant","key":"ref9"},{"key":"ref46","first-page":"717","article-title":"Performance of glottal inverse filtering as tested by aeroelastic modelling of phonation and FE modelling of vocal tract","volume":"92","author":"alku","year":"2006","journal-title":"Acta Acust United with Acust"},{"key":"ref45","first-page":"139","author":"walker","year":"2005","journal-title":"Nonlinear Analyses and Algorithms for Speech Processing"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1121\/1.1490365"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1159\/000089611"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/78.80824"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(92)90011-U"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1985.1164544"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/PROC.1975.9792"}],"container-title":["IEEE Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/10376\/5594819\/05428849.pdf?arnumber=5428849","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,19]],"date-time":"2025-02-19T12:13:31Z","timestamp":1739967211000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5428849\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,1]]},"references-count":85,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tasl.2010.2045239","relation":{},"ISSN":["1558-7916","1558-7924"],"issn-type":[{"type":"print","value":"1558-7916"},{"type":"electronic","value":"1558-7924"}],"subject":[],"published":{"date-parts":[[2011,1]]}}}