{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,4,28]],"date-time":"2024-04-28T13:40:05Z","timestamp":1714311605308},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2012,8,16]],"date-time":"2012-08-16T00:00:00Z","timestamp":1345075200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/2.0"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J AUDIO SPEECH MUSIC PROC."],"published-print":{"date-parts":[[2012,12]]},"DOI":"10.1186\/1687-4722-2012-21","type":"journal-article","created":{"date-parts":[[2012,8,16]],"date-time":"2012-08-16T12:14:39Z","timestamp":1345119279000},"source":"Crossref","is-referenced-by-count":1,"title":["Speaker-dependent model interpolation for statistical emotional speech synthesis"],"prefix":"10.1186","volume":"2012","author":[{"given":"Chih-Yu","family":"Hsu","sequence":"first","affiliation":[]},{"given":"Chia-Ping","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2012,8,16]]},"reference":[{"key":"60_CR1","first-page":"373","volume-title":"Unit selection in a concatenative speechsynthesis system using a large speech database","author":"AJ Hunt","year":"1996","unstructured":"Hunt AJ, Black AW: Unit selection in a concatenative speech synthesis system using a large speech database. Proceedings of International Conference on Acoustics, Speech and Signal Processing, 1996, pp. 373\u2013376"},{"key":"60_CR2","doi-asserted-by":"crossref","first-page":"69","DOI":"10.2498\/cit.1001049","volume":"16","author":"JZ Gros","year":"2008","unstructured":"Gros JZ, Zganec M: An efficient unit-selection method for concatenative text-to-speech synthesis systems. J. Comput. Inf. Technol 2008, 16: 69-78.","journal-title":"J. Comput. Inf. Technol"},{"key":"60_CR3","first-page":"2347","volume-title":"Simultaneous modeling of spectrum, pitch and duration inHMM-based speech synthesis","author":"T Yoshimura","year":"1999","unstructured":"Yoshimura T, Tokuda K, Masuko T, Kobayashi T, Kitamura T: Simultaneous modeling of spectrum, pitch and duration in HMM-based speech synthesis. Proceedings of Eurospeech, 1999, pp. 2347\u20132350"},{"key":"60_CR4","first-page":"1229","volume-title":"Statistical parametric speechsynthesis","author":"H Zen","year":"2007","unstructured":"Zen H, Tokuda K, Black AW: Statistical parametric speech synthesis. Proceedings of International Conference on Acoustics, Speech and Signal Processing, 2007, pp. 1229\u20131232"},{"key":"60_CR5","volume-title":"Personalising speech-to-speech translation in theEMIME project","author":"M Kurimo","year":"2010","unstructured":"Kurimo M, Byrne W, Dines J, Garner PN, Gibson M, Guan Y, Hirsim\u00e4ki T, Karhila R, King S, Liang H, Oura K, Saheer L, Shannon M, Shiota S, Tian J, Tokuda K, Wester M, Wu YJ, Yamagishi J: Personalising speech-to-speech translation in the EMIME project. Proceedings of the ACL 2010 System Demonstrations, Uppsala, Sweden, 2010"},{"key":"60_CR6","volume-title":"Speaker adaptation and the evaluation of speaker similarity inthe EMIME speech-to-speech translation project","author":"M Wester","year":"2010","unstructured":"Wester M, Dines J, Gibson M, Liang H, Wu YJ, Saheer L, King S, Oura K, Garner PN, Byrne W, Guan Y, Hirsim\u00e4ki T, Karhila R, Shannon M, Shiota S, Tian J, Tokuda K, Yamagishi J: Speaker adaptation and the evaluation of speaker similarity in the EMIME speech-to-speech translation project. Proceedings of 7th ISCA Speech Synthesis Workshop, Kyoto, Japan, 2010"},{"key":"60_CR7","unstructured":"Creer S, Green P, Cunningham S, Yamagishi J: Building personalised synthesised voices for individuals with dysarthria using the HTS toolkit. Computer Synthesized Speech Technologies: Tools for Aiding Impairment, January 2010"},{"issue":"4","key":"60_CR8","doi-asserted-by":"publisher","first-page":"1145","DOI":"10.1109\/TASL.2006.876113","volume":"14","author":"J Tao","year":"2006","unstructured":"Tao J, Kang Y, Li A: Prosody conversion from neutral speech to emotional speech. IEEE Trans. Audio Speech Lang. Process 2006, 14(4):1145-1154.","journal-title":"IEEE Trans. Audio Speech Lang. Process"},{"issue":"4","key":"60_CR9","doi-asserted-by":"publisher","first-page":"1109","DOI":"10.1109\/TASL.2006.876112","volume":"14","author":"CH Wu","year":"2006","unstructured":"Wu CH, Hsia CC, Liu TH, Wang JF: Voice conversion using duration-embedded bi-HMMs for expressive speech synthesis. IEEE Trans. Audio Speech Lang. Process 2006, 14(4):1109-1116.","journal-title":"IEEE Trans. Audio Speech Lang. Process"},{"key":"60_CR10","first-page":"2461","volume-title":"Modeling of various speaking styles and emotions for HMM-basedspeech synthesis","author":"J Yamagishi","year":"2003","unstructured":"Yamagishi J, Onishi K, Masuko T, Kobayashi T: Modeling of various speaking styles and emotions for HMM-based speech synthesis. Proceedings of Eurospeech, 2003, pp. 2461\u20132464"},{"issue":"3","key":"60_CR11","doi-asserted-by":"publisher","first-page":"502","DOI":"10.1093\/ietisy\/e88-d.3.502","volume":"E88-D","author":"J Yamagishi","year":"2005","unstructured":"Yamagishi J, Onishi K, Masuko T, Kobayashi T: Acoustic modeling of speaking styles and emotional expressions in HMM-based speech synthesis. IEICE Trans. Inf. Syst 2005, E88-D(3):502-509. 10.1093\/ietisy\/e88-d.3.502","journal-title":"IEICE Trans. Inf. Syst"},{"key":"60_CR12","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1109\/TASL.2008.2006647","volume":"17","author":"J Yamagishi","year":"2009","unstructured":"Yamagishi J, Kobayashi T, Nakano Y, Ogata K, Isogai J: Analysis of speaker adaptation algorithms for HMM-based speech synthesis and a constrained SMAPLR adaptation algorithm. IEEE Trans. Audio Speech Lang. Process 2009, 17: 66-83.","journal-title":"IEEE Trans. Audio Speech Lang. Process"},{"issue":"9","key":"60_CR13","doi-asserted-by":"publisher","first-page":"1406","DOI":"10.1093\/ietisy\/e90-d.9.1406","volume":"E90-D","author":"T Nose","year":"2007","unstructured":"Nose T, Yamagishi J, Masuko T, Kobayashi T: A style control technique for HMM-based expressive speech synthesis. IEICE Trans. Inf. Syst 2007, E90-D(9):1406-1413. 10.1093\/ietisy\/e90-d.9.1406","journal-title":"IEICE Trans. Inf. Syst"},{"key":"60_CR14","first-page":"4633","volume-title":"Speaker and styleadaptation using average voice model for style control in HMM-basedspeech synthesis","author":"M Tachibana","year":"2008","unstructured":"Tachibana M, Izawa S, Nose T, Kobayashi T: Speaker and style adaptation using average voice model for style control in HMM-based speech synthesis. ICASSP \u201908, 2008, pp. 4633\u20134636"},{"issue":"3","key":"60_CR15","doi-asserted-by":"publisher","first-page":"489","DOI":"10.1587\/transinf.E92.D.489","volume":"E92-D","author":"T Nose","year":"2009","unstructured":"Nose T, Tachibana M, Kobayashi T: HMM-based style control for expressive speech synthesis with arbitrary speaker\u2019s voice using model adaptation. IEICE Trans. Inf. Syst 2009, E92-D(3):489-497. 10.1587\/transinf.E92.D.489","journal-title":"IEICE Trans. Inf. Syst"},{"key":"60_CR16","first-page":"281","volume-title":"Prosody analysis and modeling for emotional speech synthesis","author":"DN Jiang","year":"2005","unstructured":"Jiang DN, Zhang W, Shen LQ, Cai LH: Prosody analysis and modeling for emotional speech synthesis. Proceedings of IEEE International Conference on Acoustics, Speech, and Signal Processing 2005 (ICASSP \u201905), 2005, pp. 281\u2013284"},{"issue":"11","key":"60_CR17","doi-asserted-by":"publisher","first-page":"2484","DOI":"10.1093\/ietisy\/e88-d.11.2484","volume":"E88-D","author":"M Tachibana","year":"2005","unstructured":"Tachibana M, Yamagishi J, Masuko T, Kobayashi T: Speech synthesis with various emotional expressions and speaking styles by style interpolation and morphing. IEICE Trans. Inf. Syst 2005, E88-D(11):2484-2491. 10.1093\/ietisy\/e88-d.11.2484","journal-title":"IEICE Trans. Inf. Syst"},{"key":"60_CR18","volume-title":"Speaker interpolation in HMM-based speech synthesissystem","author":"T Yoshimura","year":"1997","unstructured":"Yoshimura T, Masuko T, Tokuda K, Kobayashi T, Kitamura T: Speaker interpolation in HMM-based speech synthesis system. Fifth European Conference on Speech Communication and Technology, EUROSPEECH 1997, Rhodes, Greece, September 22\u201325, 1997"},{"key":"60_CR19","doi-asserted-by":"crossref","unstructured":"Govind D, Prasanna SRM, Yegnanarayana B: Neutral to target emotion conversion using source and suprasegmental information. INTERSPEECH, ISCA, 2011, pp. 2969\u20132972","DOI":"10.21437\/Interspeech.2011-743"},{"issue":"2","key":"60_CR20","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/5.18626","volume":"77","author":"LR Rabiner","year":"1989","unstructured":"Rabiner LR: A tutorial on hidden Markov models and selected applications in speech recognition. Proc. IEEE 1989, 77(2):257-286. 10.1109\/5.18626","journal-title":"Proc. IEEE"},{"issue":"3","key":"60_CR21","first-page":"455","volume":"E85-D","author":"T Tokuda","year":"2002","unstructured":"Tokuda T, Masuko T, Miyazaki N, Kobayashi T: Multi-space probability distribution HMM. IEICE Trans. Inf. Syst 2002, E85-D(3):455-464.","journal-title":"IEICE Trans. Inf. Syst"},{"key":"60_CR22","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"AP Dempster","year":"1977","unstructured":"Dempster AP, Laird NM, Rubin DB: Maximum likelihood from incomplete data via the EM algorithm. J. R. Stat. Soc. Ser. B 1977, 39: 1-38.","journal-title":"J. R. Stat. Soc. Ser. B"},{"key":"60_CR23","unstructured":"Bilmes JA: A gentle tutorial of the EM algorithm and its application to parameter estimation for gaussian mixture and hidden Markov models. Technical report, International Computer Science Institute, University of California at Berkeley TR-97-021, April 1998"},{"key":"60_CR24","first-page":"229","volume-title":"HiddenMarkov models based on multi-space probability distribution for pitchpattern modeling","author":"K Tokuda","year":"1999","unstructured":"Tokuda K, Masuko T, Miyazaki N, Kobayashi T: Hidden Markov models based on multi-space probability distribution for pitch pattern modeling. Proceedings of International Conference on Acoustics, Speech and Signal Processing, 1999, pp. 229\u2013232"},{"key":"60_CR25","first-page":"1043","volume-title":"Mel-generalized cepstralanalysis","author":"K Tokuda","year":"1994","unstructured":"Tokuda K, Kobayashi T, Masuko T, Imai S: Mel-generalized cepstral analysis. Proceedings of International Conference on Spoken Language Processing, 1994, pp. 1043\u20131046"},{"issue":"6","key":"60_CR26","first-page":"610","volume":"J68-A","author":"T Kobayashi","year":"1985","unstructured":"Kobayashi T, Imai S, Fukuda T: Mel-generalized log spectral approximation filter. IEICE Trans. Fund 1985, J68-A(6):610-611.","journal-title":"IEICE Trans. Fund"},{"key":"60_CR27","first-page":"901","volume-title":"Segmental tonal modeling for phone set design in, Mandarin LVCSR","author":"C Huang","year":"2004","unstructured":"Huang C, Shi Y, Zhou J, Chu M, Wang T, Chang E: Segmental tonal modeling for phone set design in, Mandarin LVCSR. Proceedings of International Conference on Acoustics, Speech and Signal Processing, 2004, pp. 901\u2013904"},{"key":"60_CR28","volume-title":"An Example of Context-dependent label format for HMM-based Speech synthesis in English","author":"H Zen","year":"2006","unstructured":"Zen H: An Example of Context-dependent label format for HMM-based Speech synthesis in English. 2006. https:\/\/wiki.inf.ed.ac.uk\/twiki\/pub\/CSTR\/F0parametrisation\/hts_lab_format.pdf"},{"issue":"4","key":"60_CR29","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1250\/ast.21.199","volume":"21","author":"T Yoshimura","year":"2000","unstructured":"Yoshimura T, Tokuda K, Masuko T, Kobayashi T, Kitamura T: Speaker interpolation for HMM-based speech synthesis system. J. Acoust. Soc. Jpn 2000, 21(4):199-206. 10.1250\/ast.21.199","journal-title":"J. Acoust. Soc. Jpn"},{"issue":"2","key":"60_CR30","doi-asserted-by":"publisher","first-page":"164","DOI":"10.1016\/j.specom.2009.09.004","volume":"52","author":"M Pucher","year":"2010","unstructured":"Pucher M, Schabus D, Yamagishi J, Neubarth F, Strom V: Modeling and interpolation of Austrian German and Viennese dialect in HMM-based speech synthesis. Speech Commun 2010, 52(2):164-179. 10.1016\/j.specom.2009.09.004","journal-title":"Speech Commun"},{"key":"60_CR31","volume-title":"Proceedings of the Sixth ISCA Workshop on Speech Synthesis (SSW6),","author":"M Fraser","year":"2007","unstructured":"Fraser M, King S: The Blizzard challenge 2007. Proceedings of the Sixth ISCA Workshop on Speech Synthesis (SSW6), 2007."}],"container-title":["EURASIP Journal on Audio, Speech, and Music Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1186\/1687-4722-2012-21\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/1687-4722-2012-21.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/1687-4722-2012-21.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,28]],"date-time":"2024-04-28T13:04:17Z","timestamp":1714309457000},"score":1,"resource":{"primary":{"URL":"https:\/\/asmp-eurasipjournals.springeropen.com\/articles\/10.1186\/1687-4722-2012-21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,8,16]]},"references-count":31,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2012,12]]}},"alternative-id":["60"],"URL":"https:\/\/doi.org\/10.1186\/1687-4722-2012-21","relation":{},"ISSN":["1687-4722"],"issn-type":[{"value":"1687-4722","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,8,16]]},"article-number":"21"}}