{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,4]],"date-time":"2025-05-04T13:10:05Z","timestamp":1746364205818,"version":"3.40.4"},"publisher-location":"Cham","reference-count":94,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319113968"},{"type":"electronic","value":"9783319113975"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-11397-5_3","type":"book-chapter","created":{"date-parts":[[2014,9,2]],"date-time":"2014-09-02T06:41:19Z","timestamp":1409640079000},"page":"37-54","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Phonetics and Machine Learning: Hierarchical Modelling of Prosody in Statistical Speech Synthesis"],"prefix":"10.1007","author":[{"given":"Martti","family":"Vainio","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2014,9,3]]},"reference":[{"key":"3_CR1","unstructured":"(2014). http:\/\/www.simple4all.org"},{"issue":"2\u20133","key":"3_CR2","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1016\/0167-6393(92)90005-R","volume":"11","author":"P Alku","year":"1992","unstructured":"Alku, P.: Glottal wave analysis with pitch synchronous iterative adaptive inverse filtering. Speech Commun. 11(2\u20133), 109\u2013118 (1992)","journal-title":"Speech Commun."},{"key":"3_CR3","doi-asserted-by":"publisher","first-page":"1329","DOI":"10.1016\/S1388-2457(99)00088-7","volume":"110","author":"P Alku","year":"1999","unstructured":"Alku, P., Tiitinen, H., N\u00e4\u00e4t\u00e4nen, R.: A method for generating natural-sounding speech stimuli for cognitive brain research. Clin. Neurophysiol. 110, 1329\u20131333 (1999)","journal-title":"Clin. Neurophysiol."},{"key":"3_CR4","unstructured":"Altosaar, T., Karjalainen, M.: Multiple-resolution analysis of speech signals. In: Proceedings of IEEE ICASSP-88, New York (1988)"},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Anumanchipalli, G.K., Oliveira, L.C., Black, A.W.: A statistical phrase\/accent model for intonation modeling. In: INTERSPEECH, pp. 1813\u20131816 (2011)","DOI":"10.21437\/Interspeech.2011-36"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Arnold, D., Wagner, P., M\u00f6bius, B.: Obtaining prominence judgments from na\u00efve listeners-influence of rating scales, linguistic levels and normalisation. In: Proceedings of Interspeech 2012 (2012)","DOI":"10.21437\/Interspeech.2012-627"},{"key":"3_CR7","doi-asserted-by":"crossref","unstructured":"Badino, L., Clark, R.A., Wester, M.: Towards hierarchical prosodic prominence generation in TTS synthesis. In: INTERSPEECH (2012)","DOI":"10.21437\/Interspeech.2012-628"},{"issue":"3","key":"3_CR8","doi-asserted-by":"publisher","first-page":"461","DOI":"10.1111\/tops.12095","volume":"6","author":"L Badino","year":"2014","unstructured":"Badino, L., D\u2019Ausilio, A., Fadiga, L., Metta, G.: Computational validation of the motor contribution to speech perception. Top. Cogn. Sci. 6(3), 461\u2013475 (2014)","journal-title":"Top. Cogn. Sci."},{"issue":"3","key":"3_CR9","doi-asserted-by":"publisher","first-page":"348","DOI":"10.1016\/j.specom.2005.04.008","volume":"46","author":"G Bailly","year":"2005","unstructured":"Bailly, G., Holm, B.: SFC: a trainable prosodic model. Speech Commun. 46(3), 348\u2013364 (2005)","journal-title":"Speech Commun."},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Becker, S., Schr\u00f6der, M., Barry, W.J.: Rule-based prosody prediction for german text-to-speech synthesis. In: Proceedings of Speech Prosody 2006, pp. 503\u2013506 (2006)","DOI":"10.21437\/SpeechProsody.2006-110"},{"key":"3_CR11","unstructured":"Bengio, Y.: Evolving culture vs local minima. arXiv preprint arXiv:1203.2990 (2012)"},{"key":"3_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-642-39593-2_1","volume-title":"Statistical Language and Speech Processing","author":"Y Bengio","year":"2013","unstructured":"Bengio, Y.: Deep learning of representations: looking forward. In: Dediu, A.-H., Mart\u00edn-Vide, C., Mitkov, R., Truthe, B. (eds.) SLSP 2013. LNCS, vol. 7978, pp. 1\u201337. Springer, Heidelberg (2013)"},{"key":"3_CR13","doi-asserted-by":"crossref","first-page":"345","DOI":"10.1007\/978-3-319-04129-2_34","volume-title":"Recent Advances of Neural Network Models and Applications","author":"\u0160 Be\u0148u\u0161","year":"2014","unstructured":"Be\u0148u\u0161, \u0160.: Conversational entrainment in the use of discourse markers. In: Bassis, S., Esposito, A., Morabito, F.C. (eds.) Recent Advances of Neural Network Models and Applications, pp. 345\u2013352. Springer, Heidelberg (2014)"},{"issue":"4","key":"3_CR14","doi-asserted-by":"publisher","first-page":"e60603","DOI":"10.1371\/journal.pone.0060603","volume":"8","author":"P Birkholz","year":"2013","unstructured":"Birkholz, P.: Modeling consonant-vowel coarticulation for articulatory speech synthesis. PloS One 8(4), e60603 (2013)","journal-title":"PloS One"},{"key":"3_CR15","unstructured":"Birkholz, P., Jackel, D.: A three-dimensional model of the vocal tract for speech synthesis. In: Proceedings of the 15th International Congress of Phonetic Sciences, Barcelona, Spain, pp. 2597\u20132600 (2003)"},{"issue":"2","key":"3_CR16","doi-asserted-by":"crossref","first-page":"282","DOI":"10.17763\/haer.34.2.4474051q78442216","volume":"34","author":"DL Bolinger","year":"1964","unstructured":"Bolinger, D.L.: Around the edge of language: intonation. Harvard Educ. Rev. 34(2), 282\u2013296 (1964)","journal-title":"Harvard Educ. Rev."},{"key":"3_CR17","unstructured":"Campbell, W.N.: CHATR: a high-definition speech re-sequencing system. In: Proceedings of 3rd ASA\/ASJ Joint Meeting, pp. 1223\u20131228 (1996)"},{"issue":"2","key":"3_CR18","doi-asserted-by":"publisher","first-page":"425","DOI":"10.1515\/labphon.2010.022","volume":"1","author":"J Cole","year":"2010","unstructured":"Cole, J., Mo, Y., Hasegawa-Johnson, M.: Signal-based and expectation-based factors in the perception of prosodic prominence. Lab. Phonology 1(2), 425\u2013452 (2010)","journal-title":"Lab. Phonology"},{"key":"3_CR19","unstructured":"Cooper, F.S.: Speech synthesizers. In: Proceedings of 4th International Congress of Phonetic Sciences (ICPhS\u201961), pp. 3\u201313 (1962)"},{"issue":"1644","key":"3_CR20","doi-asserted-by":"publisher","first-page":"20130418","DOI":"10.1098\/rstb.2013.0418","volume":"369","author":"A D\u2019Ausilio","year":"2014","unstructured":"D\u2019Ausilio, A., Maffongelli, L., Bartoli, E., Campanella, M., Ferrari, E., Berry, J., Fadiga, L.: Listening to speech recruits specific tongue motor synergies as revealed by transcranial magnetic stimulation and tissue-doppler ultrasound imaging. Philos. Trans. R. Soc. B: Biol. Sci. 369(1644), 20130418 (2014)","journal-title":"Philos. Trans. R. Soc. B: Biol. Sci."},{"key":"3_CR21","unstructured":"Denes, P.B., Pinson, E.N.: The Speech Chain, p. 121. Bell Laboratory Educational Publication, New York (1963)"},{"key":"3_CR22","doi-asserted-by":"publisher","first-page":"e2","DOI":"10.1017\/atsip.2013.9","volume":"3","author":"L Deng","year":"2014","unstructured":"Deng, L.: A tutorial survey of architectures, algorithms, and applications for deep learning. APSIPA Trans. Signal Inf. Process. 3, e2 (2014)","journal-title":"APSIPA Trans. Signal Inf. Process."},{"issue":"5","key":"3_CR23","doi-asserted-by":"publisher","first-page":"1060","DOI":"10.1109\/TASL.2013.2244083","volume":"21","author":"L Deng","year":"2013","unstructured":"Deng, L., Li, X.: Machine learning paradigms for speech recognition: an overview. IEEE Trans. Audio, Speech Lang. Process. 21(5), 1060\u20131089 (2013)","journal-title":"IEEE Trans. Audio, Speech Lang. Process."},{"key":"3_CR24","doi-asserted-by":"crossref","DOI":"10.1007\/978-94-011-5730-8","volume-title":"An Introduction to Text-to-Speech Synthesis","author":"T Dutoit","year":"1997","unstructured":"Dutoit, T.: An Introduction to Text-to-Speech Synthesis, vol. 3. Springer, New York (1997)"},{"key":"3_CR25","doi-asserted-by":"crossref","unstructured":"Eriksson, A., Thunberg, G.C., Traunm\u00fcller, H.: Syllable prominence: a matter of vocal effort, phonetic distinctness and top-down processing. In: Proceedings of European Conference on Speech Communication and Technology Aalborg, vol. 1, pp. 399\u2013402, September 2001","DOI":"10.21437\/Eurospeech.2001-64"},{"key":"3_CR26","unstructured":"Fant, C.G.M., Martony, J., Rengman, U., Risberg, A.: OVE II synthesis strategy. In: Proceedings of the Speech Communication Seminar F, vol. 5 (1962)"},{"key":"3_CR27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-02732-6","volume-title":"Application of Wavelets in Speech Processing","author":"MH Farouk","year":"2014","unstructured":"Farouk, M.H.: Application of Wavelets in Speech Processing. Springer, New York (2014)"},{"key":"3_CR28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-01562-9","volume-title":"Speech Analysis, Synthesis and Perception","author":"JL Flanagan","year":"1972","unstructured":"Flanagan, J.L.: Speech Analysis, Synthesis and Perception, vol. 1, 2nd edn. Springer, Heidelberg (1972)","edition":"2"},{"issue":"2","key":"3_CR29","doi-asserted-by":"publisher","first-page":"306","DOI":"10.1121\/1.1908864","volume":"29","author":"JL Flanagan","year":"1957","unstructured":"Flanagan, J.L.: Note on the design of \u201cterminal-analog\u201d speech synthesizers. J. Acoust. Soc. Am. 29(2), 306\u2013310 (1957)","journal-title":"J. Acoust. Soc. Am."},{"key":"3_CR30","doi-asserted-by":"publisher","first-page":"4522","DOI":"10.1098\/rspb.2012.1741","volume":"279","author":"SL Frank","year":"2012","unstructured":"Frank, S.L., Bod, R., Christiansen, M.H.: How hierarchical is language use? Proc. R. Soc. B: Biol. Sci. 279, 4522\u20134531 (2012)","journal-title":"Proc. R. Soc. B: Biol. Sci."},{"issue":"4","key":"3_CR31","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1250\/ast.5.233","volume":"5","author":"H Fujisaki","year":"1984","unstructured":"Fujisaki, H., Hirose, K.: Analysis of voice fundamental frequency contours for declarative sentences of Japanese. J. Acoust. Soc. Jpn. (E) 5(4), 233\u2013241 (1984)","journal-title":"J. Acoust. Soc. Jpn. (E)"},{"key":"3_CR32","first-page":"75","volume":"30","author":"H Fujisaki","year":"1971","unstructured":"Fujisaki, H., Sudo, H.: A generative model for the prosody of connected speech in japanese. Annu. Rep. Eng. Res. Inst. 30, 75\u201380 (1971)","journal-title":"Annu. Rep. Eng. Res. Inst."},{"key":"3_CR33","doi-asserted-by":"crossref","unstructured":"Fukui, K., Ishikawa, Y., Sawa, T., Shintaku, E., Honda, M., Takanishi, A.: New anthropomorphic talking robot having a three-dimensional articulation mechanism and improved pitch range. In: 2007 IEEE International Conference on Robotics and Automation pp. 2922\u20132927. IEEE (2007)","DOI":"10.1109\/ROBOT.2007.363915"},{"key":"3_CR34","volume-title":"Autosegmental and Metrical Phonology","author":"JA Goldsmith","year":"1990","unstructured":"Goldsmith, J.A.: Autosegmental and Metrical Phonology, vol. 11. Blackwell, Oxford (1990)"},{"key":"3_CR35","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1142\/9789814415125_0004","volume":"11","author":"A Grossman","year":"1985","unstructured":"Grossman, A., Morlet, J.: Decomposition of functions into wavelets of constant shape, and related transforms. Math. Phys. Lect. Recent Results 11, 135\u2013165 (1985)","journal-title":"Math. Phys. Lect. Recent Results"},{"issue":"1","key":"3_CR36","first-page":"83","volume":"1","author":"M Halle","year":"1980","unstructured":"Halle, M., Vergnaud, J.R.: Three dimensional phonology. J. Linguist. Res. 1(1), 83\u2013105 (1980)","journal-title":"J. Linguist. Res."},{"key":"3_CR37","volume-title":"Metrical Structures in Phonology","author":"M Halle","year":"1978","unstructured":"Halle, M., Vergnaud, J.R., et al.: Metrical Structures in Phonology. MIT, Cambridge (1978)"},{"issue":"1","key":"3_CR38","doi-asserted-by":"publisher","first-page":"EL1","DOI":"10.1121\/1.2744126","volume":"122","author":"A Hannukainen","year":"2007","unstructured":"Hannukainen, A., Lukkari, T., Malinen, J., Palo, P.: Vowel formants from the wave equation. J. Acoust. Soc. Am. 122(1), EL1\u2013EL7 (2007)","journal-title":"J. Acoust. Soc. Am."},{"issue":"4","key":"3_CR39","doi-asserted-by":"publisher","first-page":"1155","DOI":"10.1121\/1.388325","volume":"72","author":"SR Hertz","year":"1982","unstructured":"Hertz, S.R.: From text to speech with SRS. J. Acoust. Soc. Am. 72(4), 1155\u20131170 (1982)","journal-title":"J. Acoust. Soc. Am."},{"issue":"11","key":"3_CR40","doi-asserted-by":"publisher","first-page":"1589","DOI":"10.1109\/PROC.1985.13341","volume":"73","author":"SR Hertz","year":"1985","unstructured":"Hertz, S.R., Kadin, J., Karplus, K.J.: The delta rule development system for speech synthesis from text. Proc. IEEE 73(11), 1589\u20131601 (1985)","journal-title":"Proc. IEEE"},{"issue":"1\u20132","key":"3_CR41","doi-asserted-by":"publisher","first-page":"305","DOI":"10.1016\/0004-3702(93)90020-C","volume":"63","author":"J Hirschberg","year":"1993","unstructured":"Hirschberg, J.: Pitch accent in context: predicting intonational prominence from text. Artif. Intell. 63(1\u20132), 305\u2013340 (1993)","journal-title":"Artif. Intell."},{"key":"3_CR42","doi-asserted-by":"crossref","unstructured":"Hunt, A.J., Black, A.W.: Unit selection in a concatenative speech synthesis system using a large speech database. In: Proceedings of the 1996 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP-96, vol. 1, pp. 373\u2013376. IEEE (1996)","DOI":"10.1109\/ICASSP.1996.541110"},{"key":"3_CR43","doi-asserted-by":"crossref","unstructured":"King, S.: Measuring a decade of progress in text-to-speech. Loguens 1(1) (2014)","DOI":"10.3989\/loquens.2014.006"},{"issue":"3","key":"3_CR44","doi-asserted-by":"publisher","first-page":"737","DOI":"10.1121\/1.395275","volume":"82","author":"DH Klatt","year":"1987","unstructured":"Klatt, D.H.: Review of text-to-speech conversion for english. J. Acoust. Soc. Am. 82(3), 737\u2013793 (1987)","journal-title":"J. Acoust. Soc. Am."},{"key":"3_CR45","unstructured":"Klatt, D.: Acoustic theory of terminal analog speech synthesis. In: Proceedings of 1972 International Conference on Speech Communication Processing, Boston, MA (1972)"},{"key":"3_CR46","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1007\/978-3-540-49127-9_14","volume-title":"Springer Handbook of Speech Processing","author":"WB Kleijn","year":"2008","unstructured":"Kleijn, W.B.: Principles of speech coding. In: Benesty, J., Sondhi, M.M., Huang, Y. (eds.) Springer Handbook of Speech Processing, pp. 283\u2013306. Springer, Heidelberg (2008)"},{"key":"3_CR47","doi-asserted-by":"crossref","unstructured":"Kochanski, G., Shih, C.: Stem-ml: language-independent prosody description. In: INTERSPEECH, pp. 239\u2013242 (2000)","DOI":"10.21437\/ICSLP.2000-522"},{"issue":"3","key":"3_CR48","doi-asserted-by":"publisher","first-page":"311","DOI":"10.1016\/S0167-6393(02)00047-X","volume":"39","author":"G Kochanski","year":"2003","unstructured":"Kochanski, G., Shih, C.: Prosody modeling with soft templates. Speech Commun. 39(3), 311\u2013352 (2003)","journal-title":"Speech Commun."},{"key":"3_CR49","doi-asserted-by":"crossref","unstructured":"Kruschke, H., Lenz, M.: Estimation of the parameters of the quantitative intonation model with continuous wavelet analysis. In: INTERSPEECH (2003)","DOI":"10.21437\/Eurospeech.2003-45"},{"key":"3_CR50","doi-asserted-by":"crossref","unstructured":"Lei, M., Wu, Y.J., Soong, F.K., Ling, Z.H., Dai, L.R.: A hierarchical f0 modeling method for HMM-based speech synthesis. In: INTERSPEECH, pp. 2170\u20132173 (2010)","DOI":"10.21437\/Interspeech.2010-598"},{"issue":"6","key":"3_CR51","doi-asserted-by":"publisher","first-page":"431","DOI":"10.1037\/h0020279","volume":"74","author":"AM Liberman","year":"1967","unstructured":"Liberman, A.M., Cooper, F.S., Shankweiler, D.P., Studdert-Kennedy, M.: Perception of the speech code. Psychol. Rev. 74(6), 431 (1967)","journal-title":"Psychol. Rev."},{"issue":"1","key":"3_CR52","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/0010-0277(85)90021-6","volume":"21","author":"AM Liberman","year":"1985","unstructured":"Liberman, A.M., Mattingly, I.G.: The motor theory of speech perception revised. Cognition 21(1), 1\u201336 (1985)","journal-title":"Cognition"},{"issue":"1","key":"3_CR53","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1109\/TASL.2012.2215600","volume":"21","author":"ZH Ling","year":"2013","unstructured":"Ling, Z.H., Richmond, K., Yamagishi, J.: Articulatory control of HMM-based parametric speech synthesis using feature-space-switched multiple regression. IEEE Trans. Audio Speech Lang. Process. 21(1), 207\u2013219 (2013)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"3_CR54","doi-asserted-by":"crossref","unstructured":"Mallat, S.: A wavelet tour of signal processing. Access Online via Elsevier (1999)","DOI":"10.1016\/B978-012466606-1\/50008-8"},{"key":"3_CR55","doi-asserted-by":"crossref","unstructured":"Mishra, T., Santen, J.V., Klabbers, E.: Decomposition of pitch curves in the general superpositional intonation model. In: Speech Prosody, Dresden, Germany (2006)","DOI":"10.21437\/SpeechProsody.2006-116"},{"issue":"1","key":"3_CR56","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1075\/hl.34.1.03bat","volume":"34","author":"EB Moro","year":"2007","unstructured":"Moro, E.B.: A 19th-century speaking machine: the tecnef\u00f3n of severino perez y vazquez. Historiographia Linguistica 34(1), 19\u201336 (2007)","journal-title":"Historiographia Linguistica"},{"key":"3_CR57","doi-asserted-by":"crossref","unstructured":"Nishikawa, K., Asama, K., Hayashi, K., Takanobu, H., Takanishi, A.: Development of a talking robot. In: Proceedings of 2000 IEEE\/RSJ International Conference on Intelligent Robots and Systems 2000 (IROS 2000), vol. 3, pp. 1760\u20131765. IEEE (2000)","DOI":"10.1109\/IROS.2000.895226"},{"key":"3_CR58","unstructured":"\u00d6hman, S.: Word and sentence intonation: a quantitative model. Speech Transmission Laboratory, Department of Speech Communication, Royal Institute of Technology (1967)"},{"issue":"5853","key":"3_CR59","doi-asserted-by":"publisher","first-page":"1088","DOI":"10.1126\/science.1145803","volume":"318","author":"R Pfeifer","year":"2007","unstructured":"Pfeifer, R., Lungarella, M., Iida, F.: Self-organization, embodiment, and biologically inspired robotics. Science 318(5853), 1088\u20131093 (2007)","journal-title":"Science"},{"key":"3_CR60","unstructured":"Raitio, T., Lu, H., Kane, J., Suni, A., Vainio, M., King, S., Alku, P.: Voice source modelling using deep neural networks for statistical parametric speech synthesis. In: 22nd European Signal Processing Conference (EUSIPCO), Lisbon, Portugal, September 2014 (accepted)"},{"key":"3_CR61","doi-asserted-by":"crossref","unstructured":"Raitio, T., Suni, A., Juvela, L., Vainio, M., Alku, P.: Deep neural network based trainable voice source model for synthesis of speech with varying vocal effort. In: Proceedings of Interspeech, Singapore, accepted: September 2014","DOI":"10.21437\/Interspeech.2014-444"},{"key":"3_CR62","doi-asserted-by":"crossref","unstructured":"Raitio, T., Suni, A., Pohjalainen, J., Airaksinen, M., Vainio, M., Alku, P.: Analysis and synthesis of shouted speech. In: Interspeech, Lyon, France, pp. 1544\u20131548, August 2013","DOI":"10.21437\/Interspeech.2013-391"},{"key":"3_CR63","doi-asserted-by":"crossref","unstructured":"Raitio, T., Suni, A., Vainio, M., Alku, P.: Analysis of HMM-based lombard speech synthesis. In: Interspeech, Florence, Italy, pp. 2781\u20132784, August 2011","DOI":"10.21437\/Interspeech.2011-696"},{"issue":"2","key":"3_CR64","doi-asserted-by":"publisher","first-page":"648","DOI":"10.1016\/j.csl.2013.03.003","volume":"28","author":"T Raitio","year":"2014","unstructured":"Raitio, T., Suni, A., Vainio, M., Alku, P.: Synthesis and perception of breathy, normal, and lombard speech in the presence of noise. Comput. Speech Lang. 28(2), 648\u2013664 (2014)","journal-title":"Comput. Speech Lang."},{"key":"3_CR65","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-2281-2","volume-title":"Modern Methods of Speech Processing","author":"R Ramachandran","year":"1995","unstructured":"Ramachandran, R., Mammone, R.: Modern Methods of Speech Processing. Springer, New York (1995)"},{"key":"3_CR66","volume-title":"Speech Time-Frequency Representation","author":"MD Riley","year":"1989","unstructured":"Riley, M.D.: Speech Time-Frequency Representation, vol. 63. Springer, New York (1989)"},{"issue":"6","key":"3_CR67","doi-asserted-by":"publisher","first-page":"2985","DOI":"10.1121\/1.401772","volume":"90","author":"JC van Rooij","year":"1991","unstructured":"van Rooij, J.C., Plomp, R.: The effect of linguistic entropy on speech perception in noise in young and elderly listeners. J. Acoust. Soc. Am. 90(6), 2985\u20132991 (1991)","journal-title":"J. Acoust. Soc. Am."},{"key":"3_CR68","unstructured":"van Santen, J.P., Mishra, T., Klabbers, E.: Estimating phrase curves in the general superpositional intonation model. In: Fifth ISCA Workshop on Speech Synthesis (2004)"},{"issue":"1","key":"3_CR69","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1016\/0167-6393(93)90074-U","volume":"13","author":"MR Schroeder","year":"1993","unstructured":"Schroeder, M.R.: A brief history of synthetic speech. Speech Commun. 13(1), 231\u2013237 (1993)","journal-title":"Speech Commun."},{"issue":"4","key":"3_CR70","doi-asserted-by":"publisher","first-page":"1229","DOI":"10.1037\/a0020490","volume":"117","author":"J Simko","year":"2010","unstructured":"Simko, J., Cummins, F.: Embodied task dynamics. Psychol. Rev. 117(4), 1229 (2010)","journal-title":"Psychol. Rev."},{"key":"3_CR71","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1016\/j.wocn.2013.11.006","volume":"44","author":"J \u0160imko","year":"2014","unstructured":"\u0160imko, J., O\u2019Dell, M., Vainio, M.: Emergent consonantal quantity contrast and context-dependence of gestural phasing. J. Phonetics 44, 130\u2013151 (2014)","journal-title":"J. Phonetics"},{"issue":"7","key":"3_CR72","doi-asserted-by":"publisher","first-page":"955","DOI":"10.1109\/TASSP.1987.1165240","volume":"35","author":"MM Sondhi","year":"1987","unstructured":"Sondhi, M.M., Schroeter, J.: A hybrid time-frequency domain articulatory speech synthesizer. IEEE Trans. Acoust. Speech Signal Process. 35(7), 955\u2013967 (1987)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"3_CR73","volume-title":"Multilingual Text-to-Speech Synthesis","author":"RW Sproat","year":"1997","unstructured":"Sproat, R.W.: Multilingual Text-to-Speech Synthesis. Kluwer Academic Publishers, Boston (1997)"},{"issue":"5","key":"3_CR74","doi-asserted-by":"publisher","first-page":"3231","DOI":"10.1121\/1.1869752","volume":"117","author":"BH Story","year":"2005","unstructured":"Story, B.H.: A parametric model of the vocal tract area function for vowel and consonant simulation. J. Acoust. Soc. Am. 117(5), 3231\u20133254 (2005)","journal-title":"J. Acoust. Soc. Am."},{"key":"3_CR75","unstructured":"Suni, A., Aalto, D., Raitio, T., Alku, P., Vainio, M.: Wavelets for intonation modeling in HMM speech synthesis. In: 8th ISCA Speech Synthesis Workshop (SSW8), Barcelona, Spain, pp. 285\u2013290, August-September 2013"},{"key":"3_CR76","doi-asserted-by":"crossref","unstructured":"Suni, A., Raitio, T., Vainio, M., Alku, P.: The GlottHMM speech synthesis entry for Blizzard Challenge 2010. In: Blizzard Challenge 2010 Workshop, Kyoto, Japan, September 2010","DOI":"10.21437\/Blizzard.2010-11"},{"key":"3_CR77","doi-asserted-by":"crossref","unstructured":"Suni, A., Raitio, T., Vainio, M., Alku, P.: The GlottHMM entry for Blizzard Challenge 2011: utilizing source unit selection in HMM-based speech synthesis for improved excitation generation. In: Blizzard Challenge 2011 Workshop, Florence, Italy, September 2011","DOI":"10.21437\/Blizzard.2011-7"},{"key":"3_CR78","doi-asserted-by":"crossref","unstructured":"Suni, A., Raitio, T., Vainio, M., Alku, P.: The GlottHMM entry for Blizzard Challenge 2012 - hybrid approach. In: Blizzard Challenge 2012 Workshop, Portland, Oregon, September 2012","DOI":"10.21437\/Blizzard.2012-8"},{"key":"3_CR79","unstructured":"Suni, A., Simko, J., Aalto, D., Vainio, M.: Continuous wavelet transform in text-to-speech synthesis prosody control (in preparation)"},{"key":"3_CR80","unstructured":"Suni, A.S., Aalto, D., Raitio, T., Alku, P., Vainio, M., et al.: Wavelets for intonation modeling in HMM speech synthesis. In: Proceedings of 8th ISCA Workshop on Speech Synthesis, Barcelona, 31 August-2 September 2013"},{"key":"3_CR81","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511816338","volume-title":"Text-to-Speech Synthesis","author":"P Taylor","year":"2009","unstructured":"Taylor, P.: Text-to-Speech Synthesis. Cambridge University Press, Cambridge (2009)"},{"key":"3_CR82","doi-asserted-by":"crossref","unstructured":"Tokuda, K., Kobayashi, T., Imai, S.: Speech parameter generation from HMM using dynamic features. In: 1995 International Conference on Acoustics, Speech, and Signal Processing, ICASSP-95, vol. 1, pp. 660\u2013663. IEEE (1995)","DOI":"10.1109\/ICASSP.1995.479684"},{"key":"3_CR83","doi-asserted-by":"crossref","unstructured":"Tokuda, K., Yoshimura, T., Masuko, T., Kobayashi, T., Kitamura, T.: Speech parameter generation algorithms for HMM-based speech synthesis. In: Proceedings of 2001 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP\u201900, vol. 3, pp. 1315\u20131318. IEEE (2000)","DOI":"10.1109\/ICASSP.2000.861820"},{"issue":"7","key":"3_CR84","doi-asserted-by":"publisher","first-page":"2359","DOI":"10.1007\/s00221-014-3932-y","volume":"232","author":"L Vainio","year":"2014","unstructured":"Vainio, L., Tiainen, M., Tiippana, K., Vainio, M.: Shared processing of planning articulatory gestures and grasping. Exp. Brain Res. 232(7), 2359\u20132368 (2014)","journal-title":"Exp. Brain Res."},{"issue":"1","key":"3_CR85","doi-asserted-by":"publisher","first-page":"e53061","DOI":"10.1371\/journal.pone.0053061","volume":"8","author":"L Vainio","year":"2013","unstructured":"Vainio, L., Schulman, M., Tiippana, K., Vainio, M.: Effect of syllable articulation on precision and power grip performance. PloS One 8(1), e53061 (2013)","journal-title":"PloS One"},{"key":"3_CR86","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1016\/j.wocn.2005.06.004","volume":"34","author":"M Vainio","year":"2006","unstructured":"Vainio, M., J\u00e4rvikivi, J.: Tonal features, intensity, and word order in the perception of prominence. J. Phonetics 34, 319\u2013342 (2006)","journal-title":"J. Phonetics"},{"key":"3_CR87","unstructured":"Vainio, M., Suni, A., Aalto, D.: Continuous wavelet transform for analysis of speech prosody. In: Proceedings of TRASP 2013-Tools and Resources for the Analysis of Speech Prosody, An Interspeech 2013 Satellite Event, August 30 2013, Laboratoire Parole et Language, Aix-en-Provence, France (2013)"},{"key":"3_CR88","doi-asserted-by":"crossref","unstructured":"Vainio, M., Suni, A., Aalto, D.: Emphasis, word prominence, and continuous wavelet transform in the control of HMM based synthesis. In: Speech Prosody in Speech Synthesis - Modeling, Realizing, Converting Prosody for High Quality and Flexible Speech Synthesis, Prosody, Phonology and Phonetics. Springer (2015)","DOI":"10.1007\/978-3-662-45258-5_12"},{"key":"3_CR89","doi-asserted-by":"crossref","unstructured":"Vainio, M., Suni, A., Raitio, T., Nurminen, J., J\u00e4rvikivi, J., Alku, P.: New method for delexicalization and its application to prosodic tagging for text-to-speech synthesis. In: Interspeech, Brighton, UK, pp. 1703\u20131706, September 2009","DOI":"10.21437\/Interspeech.2009-514"},{"key":"3_CR90","unstructured":"Vainio, M., Suni, A., Sirjola, P.: Developing a finnish concept-to-speech system. In: Langemets, M., Penjam, P. (eds.) Proceedings of the Second Baltic Conference on Human Language Technologies, Tallinn, pp. 201\u2013206, 4\u20135 April 2005"},{"key":"3_CR91","unstructured":"von Kempelen, W., de P\u00e1zm\u00e1nd, W.K., Autriche, M.: Mechanismus der menschlichen Sprache nebst der Beschreibung seiner sprechenden Maschine. bei JV Degen (1791)"},{"key":"3_CR92","unstructured":"Watts, O.S.: Unsupervised learning for text-to-speech synthesis. Ph.D. thesis (2013)"},{"key":"3_CR93","doi-asserted-by":"crossref","unstructured":"Zen, H., Braunschweiler, N.: Context-dependent additive log f_0 model for HMM-based speech synthesis. In: INTERSPEECH, pp. 2091\u20132094 (2009)","DOI":"10.21437\/Interspeech.2009-599"},{"issue":"11","key":"3_CR94","doi-asserted-by":"publisher","first-page":"1039","DOI":"10.1016\/j.specom.2009.04.004","volume":"51","author":"H Zen","year":"2009","unstructured":"Zen, H., Tokuda, K., Black, A.W.: Statistical parametric speech synthesis. Speech Commun. 51(11), 1039\u20131064 (2009)","journal-title":"Speech Commun."}],"container-title":["Lecture Notes in Computer Science","Statistical Language and Speech Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-11397-5_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,4]],"date-time":"2025-05-04T12:32:05Z","timestamp":1746361925000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-11397-5_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319113968","9783319113975"],"references-count":94,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-11397-5_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"3 September 2014","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}