{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T18:51:05Z","timestamp":1774551065762,"version":"3.50.1"},"reference-count":49,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2016,6,1]],"date-time":"2016-06-01T00:00:00Z","timestamp":1464739200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100003711","name":"Ministry of Science and Technology","doi-asserted-by":"publisher","award":["MOST102-2221-E-006-094-MY3"],"award-info":[{"award-number":["MOST102-2221-E-006-094-MY3"]}],"id":[{"id":"10.13039\/501100003711","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004844","name":"National Cheng Kung University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004844","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002701","name":"Ministry of Education","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002701","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2016,6]]},"DOI":"10.1109\/taslp.2016.2537982","type":"journal-article","created":{"date-parts":[[2016,3,3]],"date-time":"2016-03-03T19:14:29Z","timestamp":1457032469000},"page":"1052-1065","source":"Crossref","is-referenced-by-count":2,"title":["Candidate Expansion and Prosody Adjustment for Natural Speech Synthesis Using a Small Corpus"],"prefix":"10.1109","volume":"24","author":[{"given":"Yan-You","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chung-Hsien","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yi-Chin","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shih-Lun","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jhing-Fa","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6289009"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2339738"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2003.1198737"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(01)00020-6"},{"key":"ref31","first-page":"79","article-title":"Moving beyond the &#x2018;beads-on-a-string&#x2019; model of speech","author":"ostendorf","year":"0","journal-title":"Proc IEEE ASRU Workshop"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1006\/csla.2000.0148"},{"key":"ref37","article-title":"The NICO artificial neural network toolkit","author":"strom","year":"1997"},{"key":"ref36","first-page":"2390","article-title":"A penalized logistic regression approach to detection based phone classification","author":"siniscalchi","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2008.4518596"},{"key":"ref34","first-page":"2350","article-title":"A study on detection based automatic speech recognition","author":"ma","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref28","first-page":"303","article-title":"Residual compensation based on articulatory feature-based phone clustering for hybrid Mandarin speech synthesis","author":"huang","year":"0","journal-title":"Proc ISCA Speech Synth Workshop"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2012.2213247"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853596"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1996.541110"},{"key":"ref1","first-page":"581","article-title":"Optimising selection of units from speech databases for concatenative synthesis","volume":"1","author":"black","year":"0","journal-title":"Proc Eur Conf Speech Commun Technol (Eurospeech)"},{"key":"ref20","first-page":"2055","article-title":"Analysis of speaker similarity in the statistical speech synthesis systems using a hybrid approach","author":"guner","year":"0","journal-title":"Proc Eur Signal Process Conf (EUSIPCO)"},{"key":"ref22","article-title":"The USTC system for Blizzard Challenge 2012","author":"ling","year":"0","journal-title":"Proc Blizzard Challenge Workshop"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2007.367302"},{"key":"ref24","article-title":"The USTC system for Blizzard Challenge 2014","author":"chen","year":"0","journal-title":"Proc Blizzard Challenge Workshop"},{"key":"ref23","article-title":"The USTC system for Blizzard Challenge 2013","author":"chen","year":"0","journal-title":"Proc Blizzard Challenge Workshop"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2009.4960453"},{"key":"ref25","first-page":"333","article-title":"A hybrid TTS approach for prosody and acoustic modules","author":"sainz","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2000.861820"},{"key":"ref11","first-page":"227","article-title":"An HMM-based speech synthesis system applied to English","author":"tokuda","year":"0","journal-title":"Proc IEEE Workshop Speech Synth"},{"key":"ref40","first-page":"610","article-title":"Mel-generalized log spectral approximation filter","volume":"j68 a","author":"kobayashi","year":"1985","journal-title":"IEICE Trans Fund"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"1109","DOI":"10.1109\/TASL.2006.876112","article-title":"Voice conversion using duration-embedded bi-HMMs for expressive speech synthesis","volume":"14","author":"wu","year":"2006","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e90-d.5.816"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2007.367298"},{"key":"ref15","first-page":"294","article-title":"The HMM-based speech synthesis system version 2.0","author":"zen","year":"0","journal-title":"Proc ISCA Speech Synth Workshop"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.04.004"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2009.2034771"},{"key":"ref18","first-page":"1825","article-title":"Synthesis by generation and concatenation of multiform segments","author":"pollet","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2089679"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(00)00075-3"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2001.941031"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"941","DOI":"10.1016\/j.specom.2005.12.004","article-title":"A method for combining intonation modeling and speech unit selection in corpus-based speech synthesis systems","volume":"48","author":"d\u00edaz","year":"2006","journal-title":"Speech Commun"},{"key":"ref5","first-page":"81","article-title":"A probabilistic approach to unit selection for corpus-based speech synthesis","author":"sakai","year":"0","journal-title":"Proc Eur Conf Speech Commun Technol (Interspeech)"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","article-title":"Maximum likelihood from incomplete data via the EM algorithm","volume":"39","author":"dempster","year":"1977","journal-title":"J Roy Stat Soc Ser B"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2006.889752"},{"key":"ref49","first-page":"1043","article-title":"Mel-generalized cepstral analysis&#x2014;A unified approach to speech spectral estimation","author":"tokuda","year":"0","journal-title":"Proceedings of ICSLP &#x2018;96"},{"key":"ref9","first-page":"2347","article-title":"Simultaneous modeling of spectrum, pitch and duration in HMM-based speech synthesis","volume":"5","author":"yoshimura","year":"0","journal-title":"Proc Eur Conf Speech Commun Technol (Eurospeech)"},{"key":"ref46","first-page":"94","article-title":"TH-CoSS, a Mandarin speech corpus for TTS","volume":"21","author":"cai","year":"2007","journal-title":"J Chin Inf Process"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2040791"},{"key":"ref48","first-page":"341","article-title":"Praat, a system for doing phonetics by computer","volume":"5","author":"boersma","year":"0","journal-title":"Glot Int"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(98)00085-5"},{"key":"ref42","first-page":"89","article-title":"Tone-vowel interaction in standard Chinese","author":"hoole","year":"0","journal-title":"Proc Int Symp Tonal Aspects Lang Emphasis Tone Lang (TAL)"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1992.225953"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/26.61370"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/89.668817"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/7463555\/07425169.pdf?arnumber=7425169","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,14]],"date-time":"2024-06-14T19:22:10Z","timestamp":1718392930000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7425169\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,6]]},"references-count":49,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/taslp.2016.2537982","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,6]]}}}