{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T18:52:41Z","timestamp":1777143161442,"version":"3.51.4"},"reference-count":32,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,2,10]],"date-time":"2024-02-10T00:00:00Z","timestamp":1707523200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,10]],"date-time":"2024-02-10T00:00:00Z","timestamp":1707523200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1007\/s00034-023-02592-z","type":"journal-article","created":{"date-parts":[[2024,2,11]],"date-time":"2024-02-11T00:02:21Z","timestamp":1707609741000},"page":"3020-3041","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Enhancing Children\u2019s Short Utterance-Based ASV Using Inverse Gamma-tone Filtered Cepstral coefficients"],"prefix":"10.1007","volume":"43","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5415-8864","authenticated-orcid":false,"given":"Shahid","family":"Aziz","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"S.","family":"Shahnawazuddin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,2,10]]},"reference":[{"key":"2592_CR1","doi-asserted-by":"crossref","unstructured":"A. Batliner, M. Blomberg, S. D\u2019Arcy, D. Elenius, D. Giuliani, M. Gerosa, C. Hacker, M. Russell, M. Wong, The PF_STAR children\u2019s speech corpus. Proceedings INTERSPEECH, pp. 2761\u20132764 (2005)","DOI":"10.21437\/Interspeech.2005-705"},{"issue":"12","key":"2592_CR2","doi-asserted-by":"publisher","first-page":"1293","DOI":"10.3390\/app7121293","volume":"7","author":"EP Damsk\u00e4gg","year":"2017","unstructured":"E.P. Damsk\u00e4gg, V. V\u00e4lim\u00e4ki, Audio time stretching using fuzzy classification of spectral bins. Appl. Sci. 7(12), 1293 (2017)","journal-title":"Appl. Sci."},{"issue":"4","key":"2592_CR3","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S Davis","year":"1980","unstructured":"S. Davis, P. Mermelstein, Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Trans. Acoust. Speech Signal Process. 28(4), 357\u2013366 (1980). https:\/\/doi.org\/10.1109\/TASSP.1980.1163420","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"issue":"6","key":"2592_CR4","doi-asserted-by":"publisher","first-page":"1504","DOI":"10.1109\/TASL.2010.2092766","volume":"19","author":"D Dimitriadis","year":"2010","unstructured":"D. Dimitriadis, P. Maragos, A. Potamianos, On the effects of filterbank design and energy computation on robust speech recognition. IEEE Trans. Audio Speech Lang. Process. 19(6), 1504\u20131516 (2010)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"2592_CR5","unstructured":"M. Eskenazi, J. Mostow, D. Graff, The CMU Kids Corpus LDC97S63. https:\/\/catalog.ldc.upenn.edu\/LDC97S63 (1997)"},{"issue":"13","key":"2592_CR6","doi-asserted-by":"publisher","first-page":"16721","DOI":"10.1007\/s11042-017-5237-1","volume":"77","author":"M Fedila","year":"2018","unstructured":"M. Fedila, M. Bengherabi, A. Amrouche, Gammatone filterbank and symbiotic combination of amplitude and phase-based spectra for robust speaker verification under noisy conditions and compression artifacts. Multimed. Tools Appl. 77(13), 16721\u201316739 (2018)","journal-title":"Multimed. Tools Appl."},{"key":"2592_CR7","doi-asserted-by":"crossref","unstructured":"M. Gerosa, D. Giuliani, S. Narayanan, A. Potamianos, A review of ASR technologies for children\u2019s speech. Proceeding Workshop on Child, Computer and Interaction, pp. 7:1\u20137:8 (2009)","DOI":"10.1145\/1640377.1640384"},{"key":"2592_CR8","doi-asserted-by":"crossref","unstructured":"B. Gold, N. Morgan, D. Ellis, Speech and audio signal processing: processing and perception of speech and music (Wiley, 2011)","DOI":"10.1002\/9781118142882"},{"key":"2592_CR9","doi-asserted-by":"publisher","unstructured":"B. Gold, N. Morgan, D. Ellis, D. O\u2019Shaughnessy, Speech and audio signal processing: processing and perception of speech and music, second edition. J. Acoust. Soc. Am. 132, 1861\u20132 (2012). https:\/\/doi.org\/10.1121\/1.4742973","DOI":"10.1121\/1.4742973"},{"key":"2592_CR10","doi-asserted-by":"crossref","unstructured":"T. Kaneko, H. Kameoka, Parallel-data-free voice conversion using cycle-consistent adversarial networks. arXiv preprint arXiv:1711.11293 (2017)","DOI":"10.23919\/EUSIPCO.2018.8553236"},{"key":"2592_CR11","doi-asserted-by":"crossref","unstructured":"H.K. Kathania, S.R. Kadiri, P. Alku, M. Kurimo, Study of formant modification for children asr. ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), (IEEE, 2020), pp. 7429\u20137433","DOI":"10.1109\/ICASSP40776.2020.9053334"},{"issue":"10","key":"2592_CR12","doi-asserted-by":"publisher","first-page":"4667","DOI":"10.1007\/s00034-019-01072-7","volume":"38","author":"HK Kathania","year":"2019","unstructured":"H.K. Kathania, S. Shahnawazuddin, W. Ahmad, N. Adiga, Role of linear, mel and inverse-mel filterbanks in automatic recognition of speech from high-pitched speakers. Circuits Syst. Signal Process. 38(10), 4667\u20134682 (2019)","journal-title":"Circuits Syst. Signal Process."},{"issue":"4","key":"2592_CR13","doi-asserted-by":"publisher","first-page":"2205","DOI":"10.1007\/s00034-021-01885-5","volume":"41","author":"V Kumar","year":"2022","unstructured":"V. Kumar, A. Kumar, S. Shahnawazuddin, Creating robust children\u2019s ASR system in zero-resource condition through out-of-domain data augmentation. Circuits Syst. Signal Process. 41(4), 2205\u20132220 (2022)","journal-title":"Circuits Syst. Signal Process."},{"issue":"3","key":"2592_CR14","doi-asserted-by":"publisher","first-page":"1455","DOI":"10.1121\/1.426686","volume":"105","author":"S Lee","year":"1999","unstructured":"S. Lee, A. Potamianos, S.S. Narayanan, Acoustics of children\u2019s speech: developmental changes of temporal and spectral parameters. J. Acoust. Soc. Am. 105(3), 1455\u20131468 (1999)","journal-title":"J. Acoust. Soc. Am."},{"key":"2592_CR15","unstructured":"R.D. Patterson, I. Nimmo-Smith, J. Holdsworth, P. Rice, An efficient auditory filterbank based on the gammatone function. A meeting of the IOC Speech Group on Auditory Modelling at RSRE, vol.\u00a02 (1987)"},{"key":"2592_CR16","doi-asserted-by":"crossref","unstructured":"V. Peddinti, D. Povey, S. Khudanpur, A time delay neural network architecture for efficient modeling of long temporal contexts. Proceedings INTERSPEECH (2015)","DOI":"10.21437\/Interspeech.2015-647"},{"issue":"2","key":"2592_CR17","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1049\/iet-bmt.2017.0065","volume":"7","author":"A Poddar","year":"2018","unstructured":"A. Poddar, M. Sahidullah, G. Saha, Speaker verification with short utterances: a review of challenges, trends and opportunities. IET Biom. 7(2), 91\u2013101 (2018)","journal-title":"IET Biom."},{"key":"2592_CR18","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1016\/j.dsp.2019.01.023","volume":"88","author":"A Poddar","year":"2019","unstructured":"A. Poddar, M. Sahidullah, G. Saha, Quality measures for speaker verification with short utterances. Digit. Signal Process. 88, 66\u201379 (2019). https:\/\/doi.org\/10.1016\/j.dsp.2019.01.023","journal-title":"Digit. Signal Process."},{"key":"2592_CR19","unstructured":"D. Povey, A. Ghoshal, G. Boulianne, L. Burget, O. Glembek, N. Goel, M. Hannemann, P. Motlicek, Y. Qian, P. Schwarz, J. Silovsky, G. Stemmer, K. Vesely, The Kaldi Speech recognition toolkit. Proceedings ASRU (2011)"},{"key":"2592_CR20","unstructured":"D. Povey, X. Zhang, S. Khudanpur, Parallel training of deep neural networks with natural gradient and parameter averaging. Proceedings ICLR (2015)"},{"key":"2592_CR21","doi-asserted-by":"crossref","unstructured":"S.R.M. Prasanna, D. Govind, K.S. Rao, B. Yegnanarayana, Fast prosody modification using instants of significant excitation. Proceedings International Conference on Speech Prosody (2010)","DOI":"10.21437\/SpeechProsody.2010-126"},{"key":"2592_CR22","doi-asserted-by":"crossref","unstructured":"P. Rajan, T. Kinnunen, C. Hanilci, J. Pohjalainen, P. Alku, Using group delay functions from all-pole models for speaker recognition. INTERSPEECH, pp. 2489\u20132493 (2013)","DOI":"10.21437\/Interspeech.2013-416"},{"key":"2592_CR23","first-page":"81","volume":"1","author":"T Robinson","year":"1995","unstructured":"T. Robinson, J. Fransen, D. Pye, J. Foote, S. Renals, WSJCAM0: A British English speech corpus for large vocabulary continuous speech recognition. Proceedings ICASSP 1, 81\u201384 (1995)","journal-title":"Proceedings ICASSP"},{"key":"2592_CR24","doi-asserted-by":"crossref","unstructured":"M. Russell, S. D\u2019Arcy, Challenges for computer recognition of children\u2019s speech. Proceedings Speech and Language Technologies in Education (SLaTE) (2007)","DOI":"10.21437\/SLaTE.2007-26"},{"key":"2592_CR25","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1016\/j.csl.2018.01.001","volume":"50","author":"S Safavi","year":"2018","unstructured":"S. Safavi, M. Russell, P. Jan\u010dovi\u010d, Automatic speaker, age-group and gender identification from children\u2019s speech. Comput. Speech Lang. 50, 141\u2013156 (2018)","journal-title":"Comput. Speech Lang."},{"key":"2592_CR26","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1016\/j.patrec.2019.12.019","volume":"131","author":"S Shahnawazuddin","year":"2020","unstructured":"S. Shahnawazuddin, N. Adiga, H.K. Kathania, B.T. Sai, Creating speaker independent ASR system through prosody modification based data augmentation. Pattern Recogn. Lett. 131, 213\u2013218 (2020). https:\/\/doi.org\/10.1016\/j.patrec.2019.12.019","journal-title":"Pattern Recogn. Lett."},{"key":"2592_CR27","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1016\/j.dsp.2019.06.015","volume":"93","author":"S Shahnawazuddin","year":"2019","unstructured":"S. Shahnawazuddin, N. Adiga, B.T. Sai, W. Ahmad, H.K. Kathania, Developing speaker independent ASR system using limited data through prosody modification based on fuzzy classification of spectral bins. Digit. Signal Process. 93, 34\u201342 (2019)","journal-title":"Digit. Signal Process."},{"key":"2592_CR28","doi-asserted-by":"crossref","unstructured":"S. Shahnawazuddin, W. Ahmad, N. Adiga, A. Kumar, In-domain and out-of-domain data augmentation to improve children\u2019s speaker verification system in limited data scenario. In: ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7554\u20137558 (2020)","DOI":"10.1109\/ICASSP40776.2020.9053891"},{"key":"2592_CR29","unstructured":"K. Shobaki, J.P. Hosom, R. Cole, Cslu: Kids\u2019 speech version 1.1. Linguistic Data Consortium (2007)"},{"key":"2592_CR30","doi-asserted-by":"crossref","unstructured":"D. Snyder, D. Garcia-Romero, D. Povey, S. Khudanpur, Deep neural network embeddings for text-independent speaker verification. Proceedings INTERSPEECH, pp. 999\u20131003 (2017)","DOI":"10.21437\/Interspeech.2017-620"},{"key":"2592_CR31","doi-asserted-by":"crossref","unstructured":"D. Snyder, D. Garcia-Romero, G. Sell, D. Povey, S. Khudanpur, X-Vectors: Robust DNN Embeddings for Speaker Recognition. Proceedings ICASSP, pp. 5329\u20135333 (2018)","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"2592_CR32","doi-asserted-by":"crossref","unstructured":"G. Yeung, A. Alwan, On the difficulties of automatic speech recognition for kindergarten-aged children. Interspeech 2018 (2018)","DOI":"10.21437\/Interspeech.2018-2297"}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-023-02592-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-023-02592-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-023-02592-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,30]],"date-time":"2024-03-30T12:03:26Z","timestamp":1711800206000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-023-02592-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,10]]},"references-count":32,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,5]]}},"alternative-id":["2592"],"URL":"https:\/\/doi.org\/10.1007\/s00034-023-02592-z","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2,10]]},"assertion":[{"value":"13 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 December 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 December 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 February 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"The work presented in the uploaded manuscript is an original one, and the manuscript is not currently under consideration for publication elsewhere.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"It is hereby confirmed that the manuscript has been read and approved for submission by all the named authors. It is therefore requested to consider the submitted manuscript for publication in the esteemed journal.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for Publication"}}]}}