{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T21:34:08Z","timestamp":1760132048829,"version":"3.37.0"},"reference-count":40,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2009,8,1]],"date-time":"2009-08-01T00:00:00Z","timestamp":1249084800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2009,8]]},"DOI":"10.1109\/tasl.2009.2014796","type":"journal-article","created":{"date-parts":[[2009,6,25]],"date-time":"2009-06-25T14:57:12Z","timestamp":1245941832000},"page":"1171-1185","source":"Crossref","is-referenced-by-count":61,"title":["Integrating Articulatory Features Into HMM-Based Parametric Speech Synthesis"],"prefix":"10.1109","volume":"17","author":[{"given":"Zhen-Hua","family":"Ling","sequence":"first","affiliation":[]},{"given":"Korin","family":"Richmond","sequence":"additional","affiliation":[]},{"given":"Junichi","family":"Yamagishi","sequence":"additional","affiliation":[]},{"given":"Ren-Hua","family":"Wang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","first-page":"2469","article-title":"a comparison of acoustic features for articulatory inversion","author":"qin","year":"2007","journal-title":"Proc INTERSPEECH"},{"journal-title":"Dual rate speech coder for multimedia communications transmitting at 5 3 and 6 3 kbit\/s","year":"1996","key":"ref38"},{"key":"ref33","doi-asserted-by":"crossref","first-page":"823","DOI":"10.21437\/Eurospeech.1999-213","article-title":"synthesis of regional english using a keyword lexicon","volume":"2","author":"fitt","year":"1999","journal-title":"Proc EUROSPEECH"},{"journal-title":"Research on HMM-Based Speech Synthesis","year":"2006","author":"wu","key":"ref32"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2007.09.001"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-77347-4_23"},{"key":"ref37","first-page":"294","article-title":"the hmm-based speech synthesis system (hts) version 2.0","author":"zen","year":"2007","journal-title":"Proc 5th ISCA Speech Synthesis Workshop"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(98)00085-5"},{"journal-title":"The HTK Book (for HTK Version 3 2)","year":"2002","author":"young","key":"ref35"},{"key":"ref34","first-page":"147","article-title":"the architecture of the festival speech synthesis system","author":"taylor","year":"1998","journal-title":"3rd ESCA Speech Synthesis Workshop"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e90-d.9.1406"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1121\/1.416001"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(86)90003-8"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/0093-934X(87)90058-7"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/0730-725X(87)90477-2"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.1998.698623"},{"key":"ref15","first-page":"3","author":"summerfield","year":"1987","journal-title":"Hearing by Eye The Psychology of Lipreading"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2005.07.003"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2000.861883"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1121\/1.2404622"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2003.822636"},{"key":"ref28","doi-asserted-by":"crossref","first-page":"2517","DOI":"10.21437\/Eurospeech.2003-690","article-title":"cross-stream observation dependencies for multi-stream speech recognition","author":"cetin","year":"2003","journal-title":"Proc EUROSPEECH"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e90-1.1.325"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177697196"},{"journal-title":"Text to Speech Synthesis New Paradigms and Advances","year":"2004","author":"tokuda","key":"ref3"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"533","DOI":"10.1093\/ietisy\/e90-d.2.533","article-title":"average-voice-based speech synthesis using hsmm-based speaker adaptation and adaptive training","volume":"e90 d","author":"yamagishi","year":"2007","journal-title":"IEICE Trans Inf Syst"},{"key":"ref29","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","article-title":"maximum likelihood from incomplete data via the em algorithm","volume":"39","author":"dempster","year":"1977","journal-title":"J Roy Statist Soc"},{"key":"ref5","article-title":"ustc system for blizzard challenge 2006: an improved hmm-based speech synthesis method","author":"ling","year":"2006","journal-title":"Blizzard Challenge Workshop"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e88-d.3.502"},{"key":"ref7","first-page":"1269","article-title":"eigenvoices for hmm-based speech synthesis","author":"shichiri","year":"2002","journal-title":"Proc ICSLP"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2000.861820"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e88-d.11.2484"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"2347","DOI":"10.21437\/Eurospeech.1999-596","article-title":"simultaneous modeling of spectrum, pitch and duration in hmm-based speech synthesis","author":"yoshimura","year":"1999","journal-title":"Proc EUROSPEECH"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2006.1659965"},{"key":"ref22","first-page":"89","article-title":"minimum generation error training for hmm-based speech synthesis","author":"wu","year":"2006","journal-title":"Proc ICASSP"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1980.1171078"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1250\/ast.21.79"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1999.758104"},{"key":"ref26","first-page":"534","article-title":"a context clustering technique for average voice models","volume":"e86 d","author":"yamagishi","year":"2003","journal-title":"IEICE Trans Inf Syst"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2008.2006647"}],"container-title":["IEEE Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/10376\/5109758\/05109768.pdf?arnumber=5109768","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,10]],"date-time":"2025-02-10T10:44:35Z","timestamp":1739184275000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5109768\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,8]]},"references-count":40,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tasl.2009.2014796","relation":{},"ISSN":["1558-7916"],"issn-type":[{"type":"print","value":"1558-7916"}],"subject":[],"published":{"date-parts":[[2009,8]]}}}