{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T15:42:38Z","timestamp":1780501358071,"version":"3.54.1"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2019,5,3]],"date-time":"2019-05-03T00:00:00Z","timestamp":1556841600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2019,8]]},"DOI":"10.1007\/s00034-019-01130-0","type":"journal-article","created":{"date-parts":[[2019,5,3]],"date-time":"2019-05-03T11:09:05Z","timestamp":1556881745000},"page":"3501-3520","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["A Comparative Study of Deep Learning Techniques on Frame-Level Speech Data Classification"],"prefix":"10.1007","volume":"38","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0877-9456","authenticated-orcid":false,"given":"Abdolreza","family":"Sabzi Shahrebabaki","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ali Shariq","family":"Imran","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Negar","family":"Olfati","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Torbj\u00f8rn","family":"Svendsen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2019,5,3]]},"reference":[{"issue":"10","key":"1130_CR1","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1109\/TASLP.2014.2339736","volume":"22","author":"O Abdel-Hamid","year":"2014","unstructured":"O. Abdel-Hamid, A. Mohamed, H. Jiang, L. Deng, G. Penn, D. Yu, Convolutional neural networks for speech recognition. IEEE\/ACM Trans. Audio Speech Lang. Process. 22(10), 1533\u20131545 (2014)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"1130_CR2","unstructured":"E. Arisoy, T.N. Sainath, B. Kingsbury, B. Ramabhadran, Deep neural network language models, in Proceedings of the NAACL-HLT 2012 Workshop: Will We Ever Really Replace the N-gram Model? On the Future of Language Modeling for HLT, WLM \u201912 (Association for Computational Linguistics, Stroudsburg, PA, USA, 2012), pp. 20\u201328"},{"issue":"4","key":"1130_CR3","doi-asserted-by":"publisher","first-page":"555","DOI":"10.1162\/coli.07-034-R2","volume":"34","author":"R Artstein","year":"2008","unstructured":"R. Artstein, M. Poesio, Inter-coder agreement for computational linguistics. Comput. Linguist. 34(4), 555\u2013596 (2008)","journal-title":"Comput. Linguist."},{"issue":"5","key":"1130_CR4","doi-asserted-by":"publisher","first-page":"539","DOI":"10.1111\/j.1467-9280.2009.02327.x","volume":"20","author":"CT Best","year":"2009","unstructured":"C.T. Best, M.D. Tyler, T.N. Gooding, C.B. Orlando, C.A. Quann, Development of phonological constancy: Toddlers\u2019 perception of native- and Jamaican-accented words. Psychol. Sci. 20(5), 539\u2013542 (2009)","journal-title":"Psychol. Sci."},{"key":"1130_CR5","doi-asserted-by":"crossref","unstructured":"T. Brondsted, J.P. Madsen, Analysis of speaking rate variations in stress-timed languages, in EUROSPEECH\u20131997, pp. 481\u2013484 (1997)","DOI":"10.21437\/Eurospeech.1997-163"},{"issue":"1","key":"1130_CR6","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/TASL.2011.2134090","volume":"20","author":"GE Dahl","year":"2012","unstructured":"G.E. Dahl, D. Yu, L. Deng, A. Acero, Context-dependent pre-trained deep neural networks for large-vocabulary speech recognition. IEEE Trans. Audio Speech Lang. Process. 20(1), 30\u201342 (2012)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"1130_CR7","doi-asserted-by":"crossref","unstructured":"M. Exter, B.T. Meyer, DNN-based automatic speech recognition as a model for human phoneme perception, in INTERSPEECH\u20132016, pp. 615\u2013619 (2016)","DOI":"10.21437\/Interspeech.2016-1285"},{"key":"1130_CR8","doi-asserted-by":"crossref","unstructured":"R. Faltlhauser, G. Ruske, M. Thomae, Towards the question: why has speaking rate such an impact on speech recognition performance? in ICSLP\u20132002, pp. 2429\u20132432 (2002)","DOI":"10.21437\/ICSLP.2002-633"},{"key":"1130_CR9","unstructured":"A.L. Francis, H.C. Nusbaum, Paying attention to speaking rate, in ICSLP\u20131996, vol. 3, pp. 1537\u20131540 (1996)"},{"key":"1130_CR10","doi-asserted-by":"crossref","unstructured":"M. Grimaldi, F. Cummins, Speech style and speaker recognition: a case study, in INTERSPEECH\u20132009, pp. 920\u2013923 (2009)","DOI":"10.21437\/Interspeech.2009-276"},{"key":"1130_CR11","doi-asserted-by":"crossref","unstructured":"F.A. Laleye, E.C. Ezin, C. Motamed, Adaptive decision-level fusion for Fongbe phoneme classification using fuzzy logic and deep belief networks, in 2015 12th International Conference on Informatics in Control, Automation and Robotics (ICINCO), vol.\u00a01 (IEEE 2015), pp. 15\u201324","DOI":"10.5220\/0005536100150024"},{"key":"1130_CR12","doi-asserted-by":"publisher","unstructured":"F. Martinez, D. Tapias, J. Alvarez, Towards speech rate independence in large vocabulary continuous speech recognition, in Proceedings of the 1998 IEEE International Conference on Acoustics, Speech and Signal Processing, 1998, vol.\u00a02, pp. 725\u2013728 (1998). https:\/\/doi.org\/10.1109\/ICASSP.1998.675367","DOI":"10.1109\/ICASSP.1998.675367"},{"key":"1130_CR13","doi-asserted-by":"crossref","unstructured":"F. Martinez, D. Tapias, J. Alvarez, P. Leon, Characteristics of slow, average and fast speech and their effects in large vocabulary continuous speech recognition, in EUROSPEECH\u20131997, pp. 469\u2013472 (1997)","DOI":"10.21437\/Eurospeech.1997-160"},{"key":"1130_CR14","doi-asserted-by":"publisher","unstructured":"A. Meftah, Y.A. Alotaibi, S. Selouani, A comparative study of different speech features for arabic phonemes classification, in 2016 European Modelling Symposium (EMS), pp. 47\u201352 (2016). https:\/\/doi.org\/10.1109\/EMS.2016.018","DOI":"10.1109\/EMS.2016.018"},{"key":"1130_CR15","unstructured":"B. Meyer, T. Wesker, T. Brand, A. Mertins, B. Kollmeier, A human-machine comparison in speech recognition based on a logatome corpus, in Speech Recognition and Intrinsic Variation Workshop (2006)"},{"issue":"1","key":"1130_CR16","doi-asserted-by":"publisher","first-page":"388","DOI":"10.1121\/1.3514525","volume":"129","author":"BT Meyer","year":"2011","unstructured":"B.T. Meyer, T. Brand, B. Kollmeier, Effect of speech-intrinsic variations on human and automatic recognition of spoken phonemes. J. Acoust. Soc. Am. 129(1), 388\u2013403 (2011)","journal-title":"J. Acoust. Soc. Am."},{"key":"1130_CR17","doi-asserted-by":"crossref","unstructured":"B.T. Meyer, M. W\u00e4chter, T. Brand, B. Kollmeier, Phoneme confusions in human and automatic speech recognition, in Eighth Annual Conference of the International Speech Communication Association (2007)","DOI":"10.21437\/Interspeech.2007-430"},{"issue":"1","key":"1130_CR18","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1109\/TASL.2011.2109382","volume":"20","author":"AR Mohamed","year":"2012","unstructured":"A.R. Mohamed, G.E. Dahl, G. Hinton, Acoustic modeling using deep belief networks. IEEE Trans. Audio Speech Lang. Process. 20(1), 14\u201322 (2012)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"1130_CR19","doi-asserted-by":"crossref","unstructured":"T. Pfau, G. Ruske, Creating hidden Markov models for fast speech, in ICSLP\u20131998, pp. 205\u2013208 (1998)","DOI":"10.21437\/ICSLP.1998-231"},{"key":"1130_CR20","doi-asserted-by":"crossref","unstructured":"A. Rozi, L. Li, D. Wang, T.F. Zheng, Feature transformation for speaker verification under speaking rate mismatch condition, in Signal and Information Processing Association Annual Summit and Conference (APSIPA), 2016 Asia-Pacific (IEEE, 2016), pp. 1\u20134","DOI":"10.1109\/APSIPA.2016.7820824"},{"key":"1130_CR21","doi-asserted-by":"publisher","unstructured":"F. Seide, G. Li, X. Chen, D. Yu, Feature engineering in context-dependent deep neural networks for conversational speech transcription, in 2011 IEEE Workshop on Automatic Speech Recognition Understanding, pp. 24\u201329 (2011). https:\/\/doi.org\/10.1109\/ASRU.2011.6163899","DOI":"10.1109\/ASRU.2011.6163899"},{"key":"1130_CR22","doi-asserted-by":"crossref","unstructured":"A.S. Shahrebabaki, A.S. Imran, N. Olfati, T. Svendsen, Acoustic feature comparison for different speaking rates, in Human\u2013Computer Interaction. Interaction Technologies (Springer International Publishing, 2018), pp. 176\u2013189","DOI":"10.1007\/978-3-319-91250-9_14"},{"key":"1130_CR23","doi-asserted-by":"crossref","unstructured":"S. Theodoridis, Chapter 18\u2014neural networks and deep learning, in Machine Learning (Academic Press, Oxford 2015), pp. 875\u2013936","DOI":"10.1016\/B978-0-12-801522-3.00018-5"},{"key":"1130_CR24","doi-asserted-by":"crossref","unstructured":"D. Varghese, D. Mathew, Phoneme classification using reservoirs with MFCC and RASTA-PLP features, in 2016 International Conference on Computer Communication and Informatics (ICCCI), pp. 1\u20136 (2016)","DOI":"10.1109\/ICCCI.2016.7480007"},{"key":"1130_CR25","doi-asserted-by":"crossref","unstructured":"T. Wesker, B. Meyer, K. Wagener, J. Anem\u00fcller, A. Mertins, B. Kollmeier, Oldenburg logatome speech corpus (OLLO) for speech recognition experiments with humans and machines, in Ninth European Conference on Speech Communication and Technology (2005)","DOI":"10.21437\/Interspeech.2005-485"},{"key":"1130_CR26","doi-asserted-by":"crossref","unstructured":"B. Wrede, G.A. Fink, G. Sagerer, An investigation of modelling aspects for ratedependent speech recognition, in EUROSPEECH\u20132001, pp. 2527\u20132530 (2001)","DOI":"10.21437\/Eurospeech.2001-591"},{"key":"1130_CR27","unstructured":"M. Xu, L. Zhang, L. Wang, Database collection for study on speech variation robust speaker recognition, in Proceedings of O-COCOSDA (2008)"},{"key":"1130_CR28","doi-asserted-by":"crossref","unstructured":"X. Zeng, S. Yin, D. Wang, Learning speech rate in speech recognition, in INTERSPEECH-2015, pp. 528\u2013532 (2015)","DOI":"10.21437\/Interspeech.2015-197"}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-019-01130-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00034-019-01130-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-019-01130-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,17]],"date-time":"2022-09-17T10:04:55Z","timestamp":1663409095000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00034-019-01130-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,5,3]]},"references-count":28,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2019,8]]}},"alternative-id":["1130"],"URL":"https:\/\/doi.org\/10.1007\/s00034-019-01130-0","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,5,3]]},"assertion":[{"value":"6 June 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 April 2019","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 April 2019","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 May 2019","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}