{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T17:53:33Z","timestamp":1770486813437,"version":"3.49.0"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2019,2,26]],"date-time":"2019-02-26T00:00:00Z","timestamp":1551139200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2019,10]]},"DOI":"10.1007\/s00034-019-01072-7","type":"journal-article","created":{"date-parts":[[2019,2,26]],"date-time":"2019-02-26T11:28:20Z","timestamp":1551180500000},"page":"4667-4682","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Role of Linear, Mel and Inverse-Mel Filterbanks in Automatic Recognition of Speech from High-Pitched Speakers"],"prefix":"10.1007","volume":"38","author":[{"given":"Hemant Kumar","family":"Kathania","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3916-9693","authenticated-orcid":false,"given":"S.","family":"Shahnawazuddin","sequence":"additional","affiliation":[]},{"given":"Waquar","family":"Ahmad","sequence":"additional","affiliation":[]},{"given":"Nagaraj","family":"Adiga","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,2,26]]},"reference":[{"key":"1072_CR1","doi-asserted-by":"crossref","unstructured":"W. Ahmad, S. Shahnawazuddin, H.K. Kathania, G. Pradhan, A.B. Samaddar, Improving children\u2019s speech recognition through explicit pitch scaling based on iterative spectrogram inversion. in Proceedings of INTERSPEECH (2017)","DOI":"10.21437\/Interspeech.2017-302"},{"key":"1072_CR2","doi-asserted-by":"crossref","unstructured":"A. Batliner, M. Blomberg, S.D\u2019Arcy, D. Elenius, D. Giuliani, M. Gerosa, C. Hacker, M. Russell, M. Wong, The $$\\text{PF}\\_\\text{ STAR }$$ PF _ STAR children\u2019s speech corpus. in Proceedings of INTERSPEECH, pp. 2761\u20132764 (2005)","DOI":"10.21437\/Interspeech.2005-705"},{"key":"1072_CR3","unstructured":"D. Byrd, S. Yildirim, S. Narayanan, S. Khurana, Acoustic analysis of preschool children\u2019s speech. in Proceedings of 15th ICPhS Barcelona, pp. 949\u2013952 (2003)"},{"issue":"11","key":"1072_CR4","first-page":"2554","volume":"2","author":"S Chakroborty","year":"2008","unstructured":"S. Chakroborty, A. Roy, G. Saha, Improved closed set text-independent speaker identification by combining MFCC with evidence from flipped filter banks. Int. J. Electr. Comput. Energ. Electron. Commun. Eng. 2(11), 2554\u20132561 (2008)","journal-title":"Int. J. Electr. Comput. Energ. Electron. Commun. Eng."},{"issue":"1","key":"1072_CR5","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/TASL.2011.2134090","volume":"20","author":"G Dahl","year":"2012","unstructured":"G. Dahl, D. Yu, L. Deng, A. Acero, Context-dependent pre-trained deep neural networks for large vocabulary speech recognition. IEEE Trans. Speech Audio Process. 20(1), 30\u201342 (2012)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"1072_CR6","doi-asserted-by":"crossref","unstructured":"S. D\u2019Arcy, M. Russell, A comparison of human and computer recognition accuracy for children\u2019s speech. in Proceedings of INTERSPEECH, pp. 2187\u20132200 (2005)","DOI":"10.21437\/Interspeech.2005-697"},{"issue":"4","key":"1072_CR7","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S Davis","year":"1980","unstructured":"S. Davis, P. Mermelstein, Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Trans. Acoust. Speech Signal Process. 28(4), 357\u2013366 (1980)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"issue":"3","key":"1072_CR8","doi-asserted-by":"publisher","first-page":"508","DOI":"10.1109\/TASL.2008.916519","volume":"16","author":"G Garau","year":"2008","unstructured":"G. Garau, S. Renals, Combining spectral representations for large-vocabulary continuous speech recognition. IEEE Trans. Speech Audio Process. 16(3), 508\u2013518 (2008)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"1072_CR9","doi-asserted-by":"crossref","unstructured":"M. Gerosa, D. Giuliani, S. Narayanan, A. Potamianos, A review of ASR technologies for children\u2019s speech. in Proceedings of Workshop on Child, Computer and Interaction, pp. 7:1\u20137:8 (2009)","DOI":"10.1145\/1640377.1640384"},{"key":"1072_CR10","unstructured":"S. Ghai, Addressing pitch mismatch for children\u2019s automatic speech recognition. Ph.D. thesis, Department of EEE, Indian Institute of Technology Guwahati, India, 2011"},{"key":"1072_CR11","doi-asserted-by":"crossref","unstructured":"S. Ghai, R. Sinha, A study on the effect of pitch on LPCC and PLPC features for children\u2019s ASR in comparison to MFCC. in Proceedings of INTERSPEECH, pp. 2589\u20132592 (2011)","DOI":"10.21437\/Interspeech.2011-662"},{"key":"1072_CR12","doi-asserted-by":"crossref","unstructured":"S. Ghai, R. Sinha, Analyzing pitch robustness of PMVDR and MFCC features for children\u2019s speech recognition. in Proceedings of Signal Processing and Communications (SPCOM) (2010)","DOI":"10.1109\/SPCOM.2010.5560549"},{"key":"1072_CR13","doi-asserted-by":"crossref","unstructured":"S. Ghai, R. Sinha, Exploring the role of spectral smoothing in context of children\u2019s speech recognition. in Proceedings of INTERSPEECH, pp. 1607\u20131610 (2009)","DOI":"10.21437\/Interspeech.2009-209"},{"issue":"6","key":"1072_CR14","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","volume":"29","author":"GE Hinton","year":"2012","unstructured":"G.E. Hinton, L. Deng, D. Yu, G. Dahl, A.R. Mohamed, N. Jaitly, A. Senior, V. Vanhoucke, P. Nguyen, T. Sainath, B. Kingsbury, Deep neural networks for acoustic modeling in speech recognition. Signal Process. Mag. 29(6), 82\u201397 (2012)","journal-title":"Signal Process. Mag."},{"key":"1072_CR15","doi-asserted-by":"crossref","unstructured":"H.K. Kathania, S. Shahnawazuddin, N. Adiga, W. Ahmad, Role of prosodic features on children\u2019s speech recognition. in 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 5519\u20135523 (2018)","DOI":"10.1109\/ICASSP.2018.8461668"},{"issue":"5","key":"1072_CR16","doi-asserted-by":"publisher","first-page":"2021","DOI":"10.1007\/s00034-017-0652-0","volume":"37","author":"HK Kathania","year":"2017","unstructured":"H.K. Kathania, W. Ahmad, S. Shahnawazuddin, A.B. Samaddar, Explicit pitch mapping for improved children\u2019s speech recognition. Circuits Syst. Signal Process. 37(5), 2021\u20132044 (2017)","journal-title":"Circuits Syst. Signal Process."},{"issue":"5","key":"1072_CR17","doi-asserted-by":"publisher","first-page":"713","DOI":"10.1007\/s12046-011-0043-3","volume":"36","author":"H Kawahara","year":"2011","unstructured":"H. Kawahara, M. Morise, Technical foundations of TANDEM-STRAIGHTs, a speech analysis, modification and synthesis framework. Sadhana 36(5), 713\u2013727 (2011)","journal-title":"Sadhana"},{"issue":"3","key":"1072_CR18","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1016\/S0167-6393(98)00085-5","volume":"27","author":"H Kawahara","year":"1999","unstructured":"H. Kawahara, I. Masuda-Katsuse, A. De Cheveign\u00e9, Restructuring speech representations using a pitch-adaptive time\u2013frequency smoothing and an instantaneous-frequency-based f0 extraction: Possible role of a repetitive structure in sounds. Speech Commun. 27(3), 187\u2013207 (1999)","journal-title":"Speech Commun."},{"key":"1072_CR19","first-page":"421","volume":"9","author":"RD Kent","year":"1976","unstructured":"R.D. Kent, Anatomical and neuromuscular maturation of the speech mechanism: evidence from acoustic studies. JHSR 9, 421\u2013447 (1976)","journal-title":"JHSR"},{"issue":"1","key":"1072_CR20","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1109\/89.650310","volume":"6","author":"L Lee","year":"1998","unstructured":"L. Lee, R. Rose, A frequency warping approach to speaker normalization. IEEE Trans. Speech Audio Process. 6(1), 49\u201360 (1998)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"1072_CR21","doi-asserted-by":"crossref","unstructured":"H. Lei, E. Gonzalo, Mel, linear, and antimel frequency cepstral coefficients in broad phonetic regions for telephone speaker recognition. in Proceedings of INTERSPEECH, pp. 2323\u20132326 (2009)","DOI":"10.21437\/Interspeech.2009-389"},{"key":"1072_CR22","doi-asserted-by":"crossref","unstructured":"H. Liao, G. Pundak, O. Siohan, M.K. Carroll, N. Coccaro, Q. Jiang, T.N. Sainath, A.W. Senior, F. Beaufays, M. Bacchiani, Large vocabulary automatic speech recognition for children. in Proceedings of INTERSPEECH, pp. 1611\u20131615 (2015)","DOI":"10.21437\/Interspeech.2015-373"},{"key":"1072_CR23","doi-asserted-by":"crossref","unstructured":"A. Metallinou, J. Cheng, Using deep neural networks to improve proficiency assessment for children English language learners. in Proceedings of INTERSPEECH, pp. 1468\u20131472 (2014)","DOI":"10.21437\/Interspeech.2014-358"},{"key":"1072_CR24","unstructured":"D. Povey, A. Ghoshal, G. Boulianne, L. Burget, O. Glembek, N. Goel, M. Hannemann, P. Motlicek, Y. Qian, P. Schwarz, J. Silovsky, G. Stemmer, K. Vesely, The Kaldi speech recognition toolkit. in Proceedings of ASRU (2011)"},{"key":"1072_CR25","unstructured":"M.R. Qun Li, An analysis of the causes of increased error rates in children\u2019s speech recognition. in Proceedings of ICSLP2002, Sept 2002"},{"key":"1072_CR26","doi-asserted-by":"crossref","unstructured":"S.P. Rath, D. Povey, K. Vesel\u00fd, J. \u010cernock\u00fd, Improved feature processing for deep neural networks. in Proceedings of INTERSPEECH (2013)","DOI":"10.21437\/Interspeech.2013-48"},{"key":"1072_CR27","unstructured":"T. Robinson, J. Fransen, D. Pye, J. Foote, S. Renals, WSJCAM0: A British English speech corpus for large vocabulary continuous speech recognition. in Proceedings of ICASSP, vol. 1, pp. 81\u201384 (1995)"},{"key":"1072_CR28","unstructured":"A. Roy, G. Saha, S. Majumdar, S. Chakroborty, Capturing complementary information via reversed filter bank and parallel implementation with MFCC for improved text-independent speaker identification. in Proceedings of International Conference on Computing: Theory and Applications(ICCTA), pp. 463\u2013467 (2007)"},{"key":"1072_CR29","doi-asserted-by":"crossref","unstructured":"M. Russell, S. D\u2019Arcy, Challenges for computer recognition of children\u2019s speech. in Proceedings of Speech and Language Technologies in Education (SLaTE) (2007)","DOI":"10.21437\/SLaTE.2007-26"},{"issue":"12","key":"1072_CR30","doi-asserted-by":"publisher","first-page":"1044","DOI":"10.1109\/LSP.2007.906213","volume":"14","author":"M Russell","year":"2007","unstructured":"M. Russell, S. D\u2019Arcy, L. Qun, The effects of bandwidth reduction on human and computer recognition of children\u2019s speech. IEEE Signal Process. Lett. 14(12), 1044\u20131046 (2007)","journal-title":"IEEE Signal Process. Lett."},{"key":"1072_CR31","doi-asserted-by":"crossref","unstructured":"R. Serizel, D. Giuliani, Vocal tract length normalisation approaches to DNN-based children\u2019s and adults\u2019 speech recognition. in Proceedings of Spoken Language Technology Workshop (SLT), pp. 135\u2013140 (2014)","DOI":"10.1109\/SLT.2014.7078563"},{"issue":"3","key":"1072_CR32","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1017\/S135132491600005X","volume":"23","author":"R Serizel","year":"2016","unstructured":"R. Serizel, D. Giuliani, Deep-neural network approaches for speech recognition with heterogeneous groups of speakers including children. Nat. Lang. Eng. 23(3), 325\u2013350 (2016)","journal-title":"Nat. Lang. Eng."},{"key":"1072_CR33","doi-asserted-by":"crossref","unstructured":"S. Shahnawazuddin, A. Dey, R. Sinha, Pitch-adaptive front-end features for robust children\u2019s ASR. in Proceedings of INTERSPEECH (2016)","DOI":"10.21437\/Interspeech.2016-1020"},{"key":"1072_CR34","doi-asserted-by":"crossref","unstructured":"S. Shahnawazuddin, H. Kathania, R. Sinha, Enhancing the recognition of children\u2019s speech on acoustically mismatched ASR system. in Proceedings of TENCON (2015)","DOI":"10.1109\/TENCON.2015.7373176"},{"key":"1072_CR35","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1016\/j.dsp.2018.05.003","volume":"79","author":"S Shahnawazuddin","year":"2018","unstructured":"S. Shahnawazuddin, N. Adiga, H.K. Kathania, G. Pradhan, R. Sinha, Studying the role of pitch-adaptive spectral estimation and speaking-rate normalization in automatic speech recognition. Digit. Signal Process. 79, 142\u2013151 (2018)","journal-title":"Digit. Signal Process."},{"key":"1072_CR36","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1016\/j.specom.2018.11.001","volume":"105","author":"S Shahnawazuddin","year":"2018","unstructured":"S. Shahnawazuddin, H.K. Kathania, A. Dey, R. Sinha, Improving children\u2019s mismatched asr using structured low-rank feature projection. Speech Commun. 105, 103\u2013113 (2018)","journal-title":"Speech Commun."},{"key":"1072_CR37","doi-asserted-by":"crossref","unstructured":"R. Sinha, S. Ghai, On the use of pitch normalization for improving children\u2019s speech recognition. in Proceedings of INTERSPEECH, pp. 568\u2013571 (2009)","DOI":"10.21437\/Interspeech.2009-202"},{"key":"1072_CR38","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1016\/j.csl.2017.10.007","volume":"48","author":"R Sinha","year":"2018","unstructured":"R. Sinha, S. Shahnawazuddin, Assessment of pitch-adaptive front-end signal processing for children\u2019s speech recognition. Comput. Speech Lang. 48, 103\u2013121 (2018)","journal-title":"Comput. Speech Lang."},{"issue":"5","key":"1072_CR39","first-page":"525","volume":"7","author":"R Vergin","year":"1999","unstructured":"R. Vergin, D. O\u2019Shaughnessy, A. Farhat, Generalized Mel frequency cepstral coefficients for large-vocabulary speaker-independent continuous-speech recognition. IEEE Trans. ASSP 7(5), 525\u2013532 (1999)","journal-title":"IEEE Trans. ASSP"},{"key":"1072_CR40","doi-asserted-by":"crossref","unstructured":"X. Zhou, D. Garcia-Romero, R. Duraiswami, C. Espy-Wilson, S. Shamma, Linear versus Mel frequency cepstral coefficients for speaker recognition. in Proceedings of ASRU, pp. 559\u2013564 (2011)","DOI":"10.1109\/ASRU.2011.6163888"}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00034-019-01072-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-019-01072-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-019-01072-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T03:52:16Z","timestamp":1721015536000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00034-019-01072-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,2,26]]},"references-count":40,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2019,10]]}},"alternative-id":["1072"],"URL":"https:\/\/doi.org\/10.1007\/s00034-019-01072-7","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,2,26]]},"assertion":[{"value":"9 July 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 February 2019","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 February 2019","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 February 2019","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}