{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T22:27:18Z","timestamp":1743028038425,"version":"3.40.3"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030410049"},{"type":"electronic","value":"9783030410056"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-41005-6_24","type":"book-chapter","created":{"date-parts":[[2020,2,12]],"date-time":"2020-02-12T21:03:52Z","timestamp":1581541432000},"page":"355-368","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["An Experimental Study on Fundamental Frequency Detection in Reverberated Speech with Pre-trained Recurrent Neural Networks"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7921-1841","authenticated-orcid":false,"given":"Andrei","family":"Alfaro-Picado","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5794-5273","authenticated-orcid":false,"given":"Stacy","family":"Sol\u00eds-Cerdas","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6833-9938","authenticated-orcid":false,"given":"Marvin","family":"Coto-Jim\u00e9nez","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,2,12]]},"reference":[{"key":"24_CR1","doi-asserted-by":"crossref","unstructured":"Abdel-Hamid, O., Mohamed, A.R., Jiang, H., Penn, G.: Applying convolutional neural networks concepts to hybrid NN-HMM model for speech recognition. In: 2012 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4277\u20134280. IEEE (2012)","DOI":"10.1109\/ICASSP.2012.6288864"},{"key":"24_CR2","unstructured":"Baek, J., Cho, S.: Bankruptcy prediction for credit risk using an auto-associative neural network in Korean firms. In: 2003 Proceedings of the IEEE International Conference on Computational Intelligence for Financial Engineering, 2003. pp. 25\u201329. IEEE (2003)"},{"key":"24_CR3","doi-asserted-by":"crossref","unstructured":"Bagchi, D., Mandel, M.I., Wang, Z., He, Y., Plummer, A., Fosler-Lussier, E.: Combining spectral feature mapping and multi-channel model-based source separation for noise-robust automatic speech recognition. In: 2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), pp. 496\u2013503. IEEE (2015)","DOI":"10.1109\/ASRU.2015.7404836"},{"key":"24_CR4","unstructured":"Bengio, Y., Frasconi, P., Simard, P.: The problem of learning long-term dependencies in recurrent networks. In: IEEE International Conference on Neural Networks, pp. 1183\u20131188. IEEE (1993)"},{"key":"24_CR5","doi-asserted-by":"crossref","unstructured":"Coto-Jim\u00e9nez, M.: Pre-training long short-term memory neural networks for efficient regression in artificial speech postfiltering. In: 2018 IEEE International Work Conference on Bioinspired Intelligence (IWOBI), pp. 1\u20137. IEEE (2018)","DOI":"10.1109\/IWOBI.2018.8464204"},{"issue":"2","key":"24_CR6","doi-asserted-by":"publisher","first-page":"39","DOI":"10.3390\/biomimetics4020039","volume":"4","author":"M Coto-Jim\u00e9nez","year":"2019","unstructured":"Coto-Jim\u00e9nez, M.: Improving post-filtering of artificial speech using pre-trained lstm neural networks. Biomimetics 4(2), 39 (2019)","journal-title":"Biomimetics"},{"issue":"01","key":"24_CR7","doi-asserted-by":"publisher","first-page":"1860008","DOI":"10.1142\/S021800141860008X","volume":"32","author":"M Coto-Jim\u00e9nez","year":"2018","unstructured":"Coto-Jim\u00e9nez, M., Goddard-Close, J.: LSTM deep neural networks postfiltering for enhancing synthetic voices. Int. J. Pattern Recogn. Artif. Intell. 32(01), 1860008 (2018)","journal-title":"Int. J. Pattern Recogn. Artif. Intell."},{"key":"24_CR8","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1007\/978-3-319-43958-7_42","volume-title":"Speech and Computer","author":"M Coto-Jim\u00e9nez","year":"2016","unstructured":"Coto-Jim\u00e9nez, M., Goddard-Close, J., Mart\u00ednez-Licona, F.: Improving automatic speech recognition containing additive noise using deep denoising autoencoders of LSTM networks. In: Ronzhin, A., Potapova, R., N\u00e9meth, G. (eds.) SPECOM 2016. LNCS (LNAI), vol. 9811, pp. 354\u2013361. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-43958-7_42"},{"issue":"1","key":"24_CR9","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/TASL.2011.2134090","volume":"20","author":"GE Dahl","year":"2011","unstructured":"Dahl, G.E., Yu, D., Deng, L., Acero, A.: Context-dependent pre-trained deep neural networks for large-vocabulary speech recognition. IEEE Trans. Audio Speech Lang. Process. 20(1), 30\u201342 (2011)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"24_CR10","doi-asserted-by":"crossref","unstructured":"Du, J., Wang, Q., Gao, T., Xu, Y., Dai, L.R., Lee, C.H.: Robust speech recognition with speech enhanced deep neural networks. In: Fifteenth Annual Conference of the International Speech Communication Association (2014)","DOI":"10.21437\/Interspeech.2014-148"},{"issue":"Feb","key":"24_CR11","first-page":"625","volume":"11","author":"D Erhan","year":"2010","unstructured":"Erhan, D., Bengio, Y., Courville, A., Manzagol, P.A., Vincent, P., Bengio, S.: Why does unsupervised pre-training help deep learning? J. Mach. Learn. Res. 11(Feb), 625\u2013660 (2010)","journal-title":"J. Mach. Learn. Res."},{"key":"24_CR12","doi-asserted-by":"crossref","unstructured":"Erro, D., Sainz, I., Navas, E., Hern\u00e1ez, I.: Improved HNM-based vocoder for statistical synthesizers. In: Twelfth Annual Conference of the International Speech Communication Association (2011)","DOI":"10.21437\/Interspeech.2011-35"},{"key":"24_CR13","unstructured":"Erro, D., Sainz, I., Saratxaga, I., Navas, E., Hern\u00e1ez, I.: MFCC+ F0 extraction and waveform reconstruction using HNM: preliminary results in an hmm-based synthesizer. In: Proceeding of the FALA, pp. 29\u201332 (2010)"},{"key":"24_CR14","doi-asserted-by":"crossref","unstructured":"Fan, Y., Qian, Y., Xie, F.L., Soong, F.K.: TTS synthesis with bidirectional LSTM based recurrent neural networks. In: Fifteenth Annual Conference of the International Speech Communication Association (2014)","DOI":"10.21437\/Interspeech.2014-443"},{"key":"24_CR15","doi-asserted-by":"crossref","unstructured":"Feng, X., Zhang, Y., Glass, J.: Speech feature denoising and dereverberation via deep autoencoders for noisy reverberant speech recognition. In: 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1759\u20131763. IEEE (2014)","DOI":"10.1109\/ICASSP.2014.6853900"},{"issue":"Aug","key":"24_CR16","first-page":"115","volume":"3","author":"FA Gers","year":"2002","unstructured":"Gers, F.A., Schraudolph, N.N., Schmidhuber, J.: Learning precise timing with LSTM recurrent networks. J. Mach. Learn. Res. 3(Aug), 115\u2013143 (2002)","journal-title":"J. Mach. Learn. Res."},{"key":"24_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"799","DOI":"10.1007\/11550907_126","volume-title":"Artificial Neural Networks: Formal Models and Their Applications \u2013 ICANN 2005","author":"A Graves","year":"2005","unstructured":"Graves, A., Fern\u00e1ndez, S., Schmidhuber, J.: Bidirectional LSTM networks for improved phoneme classification and recognition. In: Duch, W., Kacprzyk, J., Oja, E., Zadro\u017cny, S. (eds.) ICANN 2005. LNCS, vol. 3697, pp. 799\u2013804. Springer, Heidelberg (2005). https:\/\/doi.org\/10.1007\/11550907_126"},{"key":"24_CR18","doi-asserted-by":"crossref","unstructured":"Graves, A., Jaitly, N., Mohamed, A.R.: Hybrid speech recognition with deep bidirectional LSTM. In: 2013 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), pp. 273\u2013278. IEEE (2013)","DOI":"10.1109\/ASRU.2013.6707742"},{"key":"24_CR19","doi-asserted-by":"crossref","unstructured":"Han, K., He, Y., Bagchi, D., Fosler-Lussier, E., Wang, D.: Deep neural network based spectral feature mapping for robust speech recognition. In: Sixteenth Annual Conference of the International Speech Communication Association (2015)","DOI":"10.21437\/Interspeech.2015-536"},{"key":"24_CR20","doi-asserted-by":"crossref","unstructured":"Hansen, J.H., Pellom, B.L.: An effective quality evaluation protocol for speech enhancement algorithms. In: Fifth International Conference on Spoken Language Processing (1998)","DOI":"10.21437\/ICSLP.1998-350"},{"issue":"6","key":"24_CR21","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","volume":"29","author":"G Hinton","year":"2012","unstructured":"Hinton, G., et al.: Deep neural networks for acoustic modeling in speech recognition: the shared views of four research groups. IEEE Signal Process. Mag. 29(6), 82\u201397 (2012)","journal-title":"IEEE Signal Process. Mag."},{"key":"24_CR22","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)"},{"issue":"8","key":"24_CR23","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"24_CR24","doi-asserted-by":"crossref","unstructured":"Huang, J., Kingsbury, B.: Audio-visual deep learning for noise robust speech recognition. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7596\u20137599. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6639140"},{"key":"24_CR25","doi-asserted-by":"crossref","unstructured":"Ishii, T., Komiyama, H., Shinozaki, T., Horiuchi, Y., Kuroiwa, S.: Reverberant speech recognition based on denoising autoencoder. In: Interspeech, pp. 3512\u20133516 (2013)","DOI":"10.21437\/Interspeech.2013-267"},{"key":"24_CR26","doi-asserted-by":"crossref","unstructured":"Kumar, A., Florencio, D.: Speech enhancement in multiple-noise conditions using deep neural networks. arXiv preprint arXiv:1605.02427 (2016)","DOI":"10.21437\/Interspeech.2016-88"},{"key":"24_CR27","doi-asserted-by":"crossref","unstructured":"Li, J., Zhao, R., Huang, J.T., Gong, Y.: Learning small-size DNN with output-distribution-based criteria. In: Fifteenth Annual Conference of the International Speech Communication Association (2014)","DOI":"10.21437\/Interspeech.2014-432"},{"key":"24_CR28","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1016\/j.specom.2017.11.003","volume":"96","author":"K Li","year":"2018","unstructured":"Li, K., Mao, S., Li, X., Wu, Z., Meng, H.: Automatic lexical stress and pitch accent detection for L2 English speech using multi-distribution deep neural networks. Speech Commun. 96, 28\u201336 (2018)","journal-title":"Speech Commun."},{"key":"24_CR29","doi-asserted-by":"crossref","unstructured":"Liu, B., Tao, J., Zhang, D., Zheng, Y.: A novel pitch extraction based on jointly trained deep BLSTM recurrent neural networks with bottleneck features. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 336\u2013340. IEEE (2017)","DOI":"10.1109\/ICASSP.2017.7952173"},{"key":"24_CR30","doi-asserted-by":"crossref","unstructured":"Narayanan, A., Wang, D.: Ideal ratio mask estimation using deep neural networks for robust speech recognition. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7092\u20137096. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6639038"},{"key":"24_CR31","doi-asserted-by":"publisher","unstructured":"Naylor, P.A., Gaubitch, N.D.: Speech Dereverberation. Springer, London (2010). https:\/\/doi.org\/10.1007\/978-1-84996-056-4","DOI":"10.1007\/978-1-84996-056-4"},{"key":"24_CR32","unstructured":"Pascanu, R., Mikolov, T., Bengio, Y.: On the difficulty of training recurrent neural networks. In: International Conference on Machine Learning, pp. 1310\u20131318 (2013)"},{"key":"24_CR33","unstructured":"Ribas, D., Llombart, J., Miguel, A., Vicente, L.: Deep speech enhancement for reverberated and noisy signals using wide residual networks. arXiv preprint arXiv:1901.00660 (2019)"},{"key":"24_CR34","doi-asserted-by":"crossref","unstructured":"Seltzer, M.L., Yu, D., Wang, Y.: An investigation of deep neural networks for noise robust speech recognition. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7398\u20137402. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6639100"},{"issue":"2","key":"24_CR35","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1109\/TASLP.2017.2779405","volume":"26","author":"J Stahl","year":"2018","unstructured":"Stahl, J., Mowlaee, P.: A pitch-synchronous simultaneous detection-estimation framework for speech enhancement. IEEE\/ACM Trans. Audio Speech Langu. Process. (TASLP) 26(2), 436\u2013450 (2018)","journal-title":"IEEE\/ACM Trans. Audio Speech Langu. Process. (TASLP)"},{"key":"24_CR36","doi-asserted-by":"crossref","unstructured":"Tang, Z., Wang, D., Zhang, Z.: Recurrent neural network training with dark knowledge transfer. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5900\u20135904. IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7472809"},{"key":"24_CR37","doi-asserted-by":"publisher","unstructured":"Valentini-Botinhao, C.: Noisy reverberant speech database for training speech enhancement algorithms and TTS models, 2016 [dataset] (2017). https:\/\/doi.org\/10.7488\/ds\/2139","DOI":"10.7488\/ds\/2139"},{"key":"24_CR38","unstructured":"Van Den Oord, A., Dieleman, S., Schrauwen, B.: Transfer learning by supervised pre-training for audio-based music classification. In: Conference of the International Society for Music Information Retrieval (ISMIR 2014) (2014)"},{"key":"24_CR39","doi-asserted-by":"crossref","unstructured":"Vesel\u1ef3, K., Hannemann, M., Burget, L.: Semi-supervised training of deep neural networks. In: 2013 IEEE Workshop on Automatic Speech Recognition and Understanding, pp. 267\u2013272. IEEE (2013)","DOI":"10.1109\/ASRU.2013.6707741"},{"issue":"4","key":"24_CR40","doi-asserted-by":"publisher","first-page":"888","DOI":"10.1016\/j.csl.2014.01.001","volume":"28","author":"F Weninger","year":"2014","unstructured":"Weninger, F., Geiger, J., W\u00f6llmer, M., Schuller, B., Rigoll, G.: Feature enhancement by deep LSTM networks for ASR in reverberant multisource environments. Comput. Speech Lang. 28(4), 888\u2013902 (2014)","journal-title":"Comput. Speech Lang."},{"key":"24_CR41","doi-asserted-by":"crossref","unstructured":"Weninger, F., Watanabe, S., Tachioka, Y., Schuller, B.: Deep recurrent de-noising auto-encoder and blind de-reverberation for reverberated speech recognition. In: 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4623\u20134627. IEEE (2014)","DOI":"10.1109\/ICASSP.2014.6854478"},{"key":"24_CR42","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1016\/j.eswa.2016.08.018","volume":"64","author":"K Wu","year":"2016","unstructured":"Wu, K., Zhang, D., Lu, G.: iPEEH: Improving pitch estimation by enhancing harmonics. Expert Syst. Appl. 64, 317\u2013329 (2016)","journal-title":"Expert Syst. Appl."}],"container-title":["Communications in Computer and Information Science","High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-41005-6_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,15]],"date-time":"2022-10-15T11:12:17Z","timestamp":1665832337000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-41005-6_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030410049","9783030410056"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-41005-6_24","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"12 February 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CARLA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Latin American High Performance Computing Conference","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turrialba","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Costa Rica","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 September 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"carla2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/carla2019.ccarla.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OCS","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"62","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"32","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"52% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}