{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T19:40:09Z","timestamp":1749843609244,"version":"3.41.0"},"reference-count":36,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,8]]},"DOI":"10.1109\/eusipco.2016.7760468","type":"proceedings-article","created":{"date-parts":[[2016,12,19]],"date-time":"2016-12-19T21:08:29Z","timestamp":1482181709000},"page":"1348-1352","source":"Crossref","is-referenced-by-count":3,"title":["Continuous fundamental frequency prediction with deep neural networks"],"prefix":"10.1109","author":[{"given":"Balint Pal","family":"Toth","sequence":"first","affiliation":[]},{"given":"Tamas Gabor","family":"Csapo","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","article-title":"Torch 7: A Matlab-like environment for machine learning","author":"collobert","year":"2011","journal-title":"BigLearn NIPS Workshop"},{"key":"ref32","first-page":"261","article-title":"Prec&#x00ED;zi&#x00F3;s p&#x00E1;rhuzamos magyar besz&#x00E9;dadatb&#x00E1;zis fejleszt&#x00E9;se &#x00E9;s szolg&#x00E1;ltat&#x00E1;sai [Development and services of a Hungarian precisely labeled and segmented, parallel speech database] (in Hungarian)","author":"olaszy","year":"2013","journal-title":"Besz&#x00E9;dkutat&#x00E1;s 2013 [Speech Research 2013]"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-49430-8_2"},{"key":"ref30","first-page":"249","article-title":"Understanding the difficulty of training deep feedforward neural networks","author":"glorot","year":"2010","journal-title":"Proceedings of the International Conference on Artificial Intelligence and Statistics (AISTATS)"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e90-d.5.816"},{"journal-title":"ITU-R Recommendation BS 1534 Method for the subjective assessment of intermediate audio quality","year":"2001","key":"ref35"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1044\/jslhr.4101.73"},{"key":"ref10","first-page":"1964","article-title":"TTS synthesis with bidirectional LSTM based recurrent neural networks","author":"fan","year":"2014","journal-title":"Proc INTERSPEECH"},{"key":"ref11","first-page":"2290","article-title":"Voice source modelling using deep neural networks for statistical parametric speech synthesis","author":"raitio","year":"2014","journal-title":"Proc EUSIPCO"},{"key":"ref12","first-page":"1969","article-title":"Deep neural network based trainable voice source model for synthesis of speech with varying vocal effort","author":"raitio","year":"2014","journal-title":"Proc INTERSPEECH"},{"key":"ref13","first-page":"2242","article-title":"Multiple feed-forward deep neural networks for statistical parametric speech synthesis","author":"takaki","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref14","first-page":"455","article-title":"Multi-space probability distribution HMM","volume":"e85 d","author":"tokuda","year":"2002","journal-title":"IEICE Trans Inf Syst"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2012.2231675"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2076805"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2010.5495561"},{"key":"ref18","first-page":"4724","article-title":"Continuous F0 in the source-excitation generation for HMM-based TTS: Do we need voiced\/unvoiced classification?","author":"latorre","year":"2011","journal-title":"Proc ICASSP"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2014.2332186"},{"key":"ref28","first-page":"315","article-title":"Deep sparse rectifier neural networks","volume":"15","author":"glorot","year":"2011","journal-title":"Proceedings of the International Conference on Artificial Intelligence and Statistics (AISTATS)"},{"key":"ref4","first-page":"141","article-title":"F0 generation with a data base of natural F0 patterns and with a neural network","author":"traber","year":"1990","journal-title":"Proc ISCA S"},{"article-title":"ADADELTA: An Adaptive learning rate method","year":"2012","author":"zeiler","key":"ref27"},{"key":"ref3","first-page":"145","article-title":"Parallel networks that learn to pronounce english text","volume":"1","author":"sejnowski","year":"1987","journal-title":"Complex Syst"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.04.004"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639225"},{"key":"ref7","first-page":"7962","article-title":"Statistical parametric speech synthesis using deep neural networks","author":"zen","year":"2013","journal-title":"Proc ICASSP"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"599","DOI":"10.21437\/Eurospeech.1995-152","article-title":"A neural-network-based model of segmental duration for speech synthesis","author":"riedi","year":"1995","journal-title":"Proc EUROSPEECH"},{"key":"ref9","first-page":"1","article-title":"F0 modeling in HMM-based speech synthesis system using Deep Belief Network","author":"mukherjee","year":"0","journal-title":"COCOSDA 2014"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1989.1.1.1"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853598"},{"key":"ref22","first-page":"294","article-title":"The HMM-based speech synthesis system version 2.0","author":"zen","year":"2007","journal-title":"Proc ISCA SSW6"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-25789-1_4"},{"key":"ref24","first-page":"1043","article-title":"Mel-generalized cepstral analysis - a unified approach to speech spectral estimation","author":"tokuda","year":"1994","journal-title":"Proc ICSLP"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1121\/1.2951592"},{"key":"ref26","first-page":"715","article-title":"Improvements of Hungarian hidden Markov model-based text-to-speech synthesis","volume":"19","author":"t\u00f3th","year":"2010","journal-title":"Acta Cybern"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1002\/ecja.4400660203"}],"event":{"name":"2016 24th European Signal Processing Conference (EUSIPCO)","start":{"date-parts":[[2016,8,29]]},"location":"Budapest, Hungary","end":{"date-parts":[[2016,9,2]]}},"container-title":["2016 24th European Signal Processing Conference (EUSIPCO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7740646\/7760191\/07760468.pdf?arnumber=7760468","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T19:05:17Z","timestamp":1749841517000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7760468\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,8]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/eusipco.2016.7760468","relation":{},"subject":[],"published":{"date-parts":[[2016,8]]}}}