{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,1,11]],"date-time":"2024-01-11T10:34:12Z","timestamp":1704969252713},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2015,1,30]],"date-time":"2015-01-30T00:00:00Z","timestamp":1422576000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2015,8]]},"DOI":"10.1007\/s00034-015-9977-8","type":"journal-article","created":{"date-parts":[[2015,1,29]],"date-time":"2015-01-29T10:01:00Z","timestamp":1422525660000},"page":"2597-2619","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["Robust Voicing Detection and $$F_{0}$$ F 0 Estimation for HMM-Based Speech Synthesis"],"prefix":"10.1007","volume":"34","author":[{"given":"N. P.","family":"Narendra","sequence":"first","affiliation":[]},{"given":"K. Sreenivasa","family":"Rao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,1,30]]},"reference":[{"issue":"2","key":"9977_CR1","doi-asserted-by":"crossref","first-page":"701","DOI":"10.1121\/1.1490365","volume":"112","author":"P Alku","year":"2002","unstructured":"P. Alku, T. Bakstrom, E. Vikman, Normalized amplitude quotient for parameterization of the glottal flow. J. Acoust. Soc. Am. 112(2), 701\u2013710 (2002)","journal-title":"J. Acoust. Soc. Am."},{"issue":"12","key":"9977_CR2","first-page":"2812","volume":"E87\u2013D","author":"D Arifianto","year":"2004","unstructured":"D. Arifianto, T. Tanaka, T. Masuko, T. Kobayashi, Robust F0 estimation of speech signal using harmonicity measure based on instantaneous frequency. IEICE Trans. Inf. Syst. E87\u2013D(12), 2812\u20132820 (2004)","journal-title":"IEICE Trans. Inf. Syst."},{"key":"9977_CR3","unstructured":"P. Bagshaw, S. M. Hiller, M. A. Jack, Enhanced Pitch Tracking and the Processing of FQ Contours for Computer and Intonation Teaching, in Proceedings of Eurospeech, (1993), pp. 1003\u20131006"},{"issue":"6","key":"9977_CR4","doi-asserted-by":"crossref","first-page":"782","DOI":"10.1016\/j.specom.2013.02.007","volume":"55","author":"Y Bayya","year":"2013","unstructured":"Y. Bayya, D.N. Gowda, Spectro-temporal analysis of speech signals using zero-time windowing and group delay function. Speech Commun. 55(6), 782\u2013795 (2013)","journal-title":"Speech Commun."},{"key":"9977_CR5","unstructured":"P. Boersma, Accurate Short-Term Analysis of Fundamental Frequency and the Harmonics-To-Noise Ratio of a Sampled Sound, in Proceedings of the Institute of Phonetic Sciences, vol. 17 (1993), p. 97\u2013110"},{"key":"9977_CR6","unstructured":"T. Drugman, A. Alwan, Joint Robust Voicing Detection and Pitch Estimation Based on Residual Harmonics, in Proceedings of Interspeech, (2011), pp. 1973\u20131976"},{"issue":"4","key":"9977_CR7","doi-asserted-by":"crossref","first-page":"233","DOI":"10.1250\/ast.5.233","volume":"5","author":"H Fujisaki","year":"1984","unstructured":"H. Fujisaki, K. Hirose, Analysis of voice fundamental frequency contours for declarative sentences of japanese. J. Acoust. Soc. Jpn. E 5(4), 233\u2013242 (1984)","journal-title":"J. Acoust. Soc. Jpn. E"},{"key":"9977_CR8","doi-asserted-by":"crossref","DOI":"10.1201\/9781420036824","volume-title":"A Practical Handbook of Speech Coders","author":"R Goldberg","year":"2000","unstructured":"R. Goldberg, L. Riek, A Practical Handbook of Speech Coders (CRC, Boca Raton, 2000)"},{"issue":"1","key":"9977_CR9","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1121\/1.396427","volume":"83","author":"DJ Hermes","year":"1988","unstructured":"D.J. Hermes, Measurement of pitch by subharmonic summation. J. Acoust. Soci. Am. 83(1), 257\u2013264 (1988)","journal-title":"J. Acoust. Soci. Am."},{"key":"9977_CR10","unstructured":"HMM-based speech synthesis system (HTS). http:\/\/hts.sp.nitech.ac.jp\/ . Accessed 20 Feb 2014"},{"key":"9977_CR11","unstructured":"H. Kawahara, H. Katayose, A. de Cheveigne, R. Patterson, Fixed Point Analysis of Frequency to Instantaneous Frequency Mapping for Accurate Estimation of f0 and Periodicity, in Proceedings of Eurospeech, vol. 6 (1999), pp. 2781\u20132784"},{"issue":"8","key":"9977_CR12","doi-asserted-by":"crossref","first-page":"1602","DOI":"10.1109\/TASL.2008.2004526","volume":"16","author":"KSR Murty","year":"2008","unstructured":"K.S.R. Murty, B. Yegnanarayana, Epoch extraction from speech signals. IEEE Trans. Audio Speech Lang. Process. 16(8), 1602\u20131613 (2008)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"6","key":"9977_CR13","doi-asserted-by":"crossref","first-page":"469","DOI":"10.1109\/LSP.2009.2016829","volume":"16","author":"KSR Murty","year":"2009","unstructured":"K.S.R. Murty, B. Yegnanarayana, M.A. Joseph, Characterization of glottal activity from speech signals. IEEE Signal Process. Lett. 16(6), 469\u2013472 (2009)","journal-title":"IEEE Signal Process. Lett."},{"issue":"3","key":"9977_CR14","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1007\/s10772-011-9094-4","volume":"14","author":"NP Narendra","year":"2011","unstructured":"N.P. Narendra, K.S. Rao, K. Ghosh, R.R. Vempada, S. Maity, Development of syllable-based text to speech synthesis system in Bengali. Int. J. Speech Technol. 14(3), 167\u2013181 (2011)","journal-title":"Int. J. Speech Technol."},{"issue":"3","key":"9977_CR15","doi-asserted-by":"crossref","first-page":"5:1","DOI":"10.1145\/2382434.2382435","volume":"9","author":"NP Narendra","year":"2012","unstructured":"N.P. Narendra, K.S. Rao, Syllable specific unit selection cost functions for text-to-speech synthesis. ACM Trans. Speech Lang. Process. 9(3), 5:1\u20135:24 (2012)","journal-title":"ACM Trans. Speech Lang. Process."},{"key":"9977_CR16","unstructured":"J. J. Odella, The use of context in large vocabulary speech recognition. PhD thesis, (Cambridge University, 1995)"},{"issue":"3","key":"9977_CR17","doi-asserted-by":"crossref","first-page":"595","DOI":"10.1587\/transinf.E93.D.595","volume":"E93\u2013D","author":"K Oura","year":"2010","unstructured":"K. Oura, H. Zen, Y. Nankaku, A. Lee, K. Tokuda, A tied covariance technique for HMM-based speech synthesis. IEICE Trans. Inf. Syst. E93\u2013D(3), 595\u2013601 (2010)","journal-title":"IEICE Trans. Inf. Syst."},{"key":"9977_CR18","unstructured":"F. Plante, G. F. Meyer, W. A. Aubsworth, A Pitch Extraction Reference Database, in Proceedings of Eurospeech, (1995), pp. 837\u2013840"},{"key":"9977_CR19","unstructured":"Y. Qian, F. Soong, M. Wang, Z. Wu, A Minimum V\/U Error Approach to F0 Generation in HMM-Based TTS, in Proceedings of Interspeech, (2009), pp. 408\u2013411"},{"key":"9977_CR20","unstructured":"K. Shichiri, A. Sawabe, K. Tokuda, T. Masuko, T. Kobayashi, T. Kitamura, Eigenvoices for HMM-Based Speech Synthesis, in Proceedings of the International Conference on Spoken Language Processing (ICSLP), (2002), pp. 1269\u20131272"},{"issue":"2","key":"9977_CR21","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1250\/ast.21.79","volume":"21","author":"K Shinoda","year":"2000","unstructured":"K. Shinoda, T. Watanabe, MDL-based context-dependent subword modeling for speech recognition. J. Acoust. Soc. Jpn. E 21(2), 79\u201386 (2000)","journal-title":"J. Acoust. Soc. Jpn. E"},{"key":"9977_CR22","unstructured":"H. Sil\u00e9n, E. Helander, J. Nurminen, M. Gabbouj, Parameterization of Vocal Fry in HMM-Based Speech Synthesis, in Proceedings of Interspeech, (2009), pp. 1775\u20131778"},{"key":"9977_CR23","unstructured":"Q. Sun, K. Hirose, W. Gu, N. Minematsu, Generation of Fundamental Frequency Contours for Mandarin Speech Synthesis Based on Tone Nucleus Model, in Proceedings of Interspeech, (2005), pp. 3265\u20133268"},{"key":"9977_CR24","volume-title":"A Robust Algorithm for Pitch Tracking (RAPT), Ch. 14","author":"D Talkin","year":"1995","unstructured":"D. Talkin, A Robust Algorithm for Pitch Tracking (RAPT), Ch. 14 (Elsevier Science, Amsterdam, 1995)"},{"key":"9977_CR25","unstructured":"M. Tamura, T. Masuko, K. Tokuda, and T. Kobayashi, Adaptation of Pitch and Spectrum for HMM-Based Speech Synthesis Using MLLR, in Proceedings of the International Conference on Acoustics, Speech, and Signal Processing, (ICASSP), vol. 2 (2001), pp. 805\u2013808"},{"issue":"5","key":"9977_CR26","doi-asserted-by":"crossref","first-page":"816","DOI":"10.1093\/ietisy\/e90-d.5.816","volume":"90","author":"T Toda","year":"2007","unstructured":"T. Toda, K. Tokuda, A speech parameter generation algorithm considering global variance for HMM-based speech synthesis. IEICE Trans. Inf. Syst. 90(5), 816\u2013824 (2007)","journal-title":"IEICE Trans. Inf. Syst."},{"key":"9977_CR27","unstructured":"K. Tokuda, T. Yoshimura, T. Masuko, T. Kobayashi, T. Kitamura, Speech Parameter Generation Algorithms for HMM-Based Speech Synthesis, in Proceedings of International Conference on Acoustics, Speech, and Signal Processing, (ICASSP), vol. 3 (2000), pp. 1315\u20131318"},{"issue":"3","key":"9977_CR28","first-page":"455","volume":"E85\u2013D","author":"K Tokuda","year":"2002","unstructured":"K. Tokuda, T. Mausko, N. Miyazaki, T. Kobayashi, Muti-space probability distribution HMM. IEICE Trans. Inf. Syst. E85\u2013D(3), 455\u2013464 (2002)","journal-title":"IEICE Trans. Inf. Syst."},{"key":"9977_CR29","unstructured":"K. Tokuda, H. Zen, A.W. Black, in Text to Speech Synthesis: New Paradigms and Advances, ed. by S. Narayanan, A. Alwan. HMM-Based Approach to Multilingual Speech Synthesis (Prentice-Hall, Upper Saddle River, 2004), pp. 135\u2013152"},{"issue":"5","key":"9977_CR30","doi-asserted-by":"crossref","first-page":"1234","DOI":"10.1109\/JPROC.2013.2251852","volume":"101","author":"K Tokuda","year":"2013","unstructured":"K. Tokuda, Y. Nankaku, T. Toda, H. Zen, J. Yamagishi, K. Oura, Speech synthesis based on hidden Markov models. Proc. IEEE 101(5), 1234\u20131252 (2013)","journal-title":"Proc. IEEE"},{"key":"9977_CR31","unstructured":"J. Yamagishi, Z. Ling, S. King, Robustness of HMM-Based Speech Synthesis, in Proceedings of Interspeech (2008), pp. 581\u2013584"},{"issue":"4","key":"9977_CR32","doi-asserted-by":"crossref","first-page":"614","DOI":"10.1109\/TASL.2008.2012194","volume":"17","author":"B Yegnanarayana","year":"2009","unstructured":"B. Yegnanarayana, K.S.R. Murty, Event-based instantaneous fundamental frequency estimation from speech signals. IEEE Trans. Audio Speech Lang. Process. 17(4), 614\u2013624 (2009)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"9977_CR33","unstructured":"T. Yoshimura, T. Masuko, K. Tokuda, T. Kobayashi, T. Kitamura, Speaker Interpolation in HMM-Based Speech Synthesis System, in Proceedings of Eurospeech, (1997), pp. 2523\u20132526"},{"issue":"1","key":"9977_CR34","doi-asserted-by":"crossref","first-page":"325","DOI":"10.1093\/ietisy\/e90-1.1.325","volume":"E90\u2013D","author":"H Zen","year":"2007","unstructured":"H. Zen, T. Toda, M. Nakamura, K. Tokuda, Details of Nitech HMM-based speech synthesis system for the Blizzard Challenge 2005. IEICE Trans. Inf. Syst. E90\u2013D(1), 325\u2013333 (2007)","journal-title":"IEICE Trans. Inf. Syst."},{"issue":"6","key":"9977_CR35","doi-asserted-by":"crossref","first-page":"1764","DOI":"10.1093\/ietisy\/e91-d.6.1764","volume":"E91\u2013D","author":"H Zen","year":"2008","unstructured":"H. Zen, T. Toda, K. Tokuda, The NITECH-NAIST HMM-based speech synthesis system for the Blizzard Challenge 2006. IEICE Trans. Inf. Syst. E91\u2013D(6), 1764\u20131773 (2008)","journal-title":"IEICE Trans. Inf. Syst."},{"issue":"11","key":"9977_CR36","doi-asserted-by":"crossref","first-page":"1039","DOI":"10.1016\/j.specom.2009.04.004","volume":"51","author":"H Zen","year":"2009","unstructured":"H. Zen, K. Tokuda, A.W. Black, Statistical parametric speech synthesis. Speech Commun. 51(11), 1039\u20131064 (2009)","journal-title":"Speech Commun."}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-015-9977-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00034-015-9977-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-015-9977-8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,22]],"date-time":"2019-05-22T15:01:49Z","timestamp":1558537309000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00034-015-9977-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,1,30]]},"references-count":36,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2015,8]]}},"alternative-id":["9977"],"URL":"https:\/\/doi.org\/10.1007\/s00034-015-9977-8","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,1,30]]}}}