{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T11:02:58Z","timestamp":1740135778345,"version":"3.37.3"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2018,5,5]],"date-time":"2018-05-05T00:00:00Z","timestamp":1525478400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1007\/s00034-018-0828-2","type":"journal-article","created":{"date-parts":[[2018,5,5]],"date-time":"2018-05-05T10:41:24Z","timestamp":1525516884000},"page":"5540-5553","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["An Experimental Study on the Significance of Variable Frame-Length and Overlap in the Context of Children\u2019s Speech Recognition"],"prefix":"10.1007","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3916-9693","authenticated-orcid":false,"given":"S.","family":"Shahnawazuddin","sequence":"first","affiliation":[]},{"given":"Chaman","family":"Singh","sequence":"additional","affiliation":[]},{"given":"Hemant Kumar","family":"Kathania","sequence":"additional","affiliation":[]},{"given":"Waquar","family":"Ahmad","sequence":"additional","affiliation":[]},{"given":"Gayadhar","family":"Pradhan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,5,5]]},"reference":[{"key":"828_CR1","unstructured":"A. Batliner, M. Blomberg, S. D\u2019Arcy, D. Elenius, D. Giuliani, M. Gerosa, C. Hacker, M. Russell, M. Wong, The PF_STAR children\u2019s speech corpus. In Proceedings INTERSPEECH (2005), pp. 2761\u20132764"},{"key":"828_CR2","unstructured":"G.T. Beauregard, X. Zhu, L. Wyse, An efficient algorithm for real-time spectrogram inversion. In Procedings of the 8th International Conference on Digital Audio Effects (2005), pp. 116\u2013118"},{"key":"828_CR3","unstructured":"D. Burnett, M. Fanty, Rapid unsupervised adaptation to children\u2019s speech on a connected-digit task. In Proceedings ICSLP, vol. 2 (1996), pp. 1145\u20131148"},{"key":"828_CR4","unstructured":"J.P. Cabral, L.C. Oliveira, Pitch-synchronous time-scaling for prosodic and voice quality transformations. In Proceedings INTERSPEECH (2005), pp. 1137\u20131140"},{"key":"828_CR5","unstructured":"S.M. Chu, D. Povey, Speaking rate adaptation using continuous frame rate normalization. In Proceedings ICASSP (2010), pp. 4306\u20134309"},{"issue":"1","key":"828_CR6","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/TASL.2011.2134090","volume":"20","author":"G Dahl","year":"2012","unstructured":"G. Dahl, D. Yu, L. Deng, A. Acero, Context-dependent pre-trained deep neural networks for large vocabulary speech recognition. IEEE Trans. Speech Audio Process. 20(1), 30\u201342 (2012)","journal-title":"IEEE Trans. Speech Audio Process."},{"issue":"4","key":"828_CR7","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S Davis","year":"1980","unstructured":"S. Davis, P. Mermelstein, Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Trans. Acoust. Speech Signal Process. 28(4), 357\u2013366 (1980). https:\/\/doi.org\/10.1109\/TASSP.1980.1163420","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"828_CR8","doi-asserted-by":"crossref","unstructured":"M. Gerosa, D. Giuliani, S. Narayanan, A. Potamianos, A review of ASR technologies for children\u2019s speech. In Proceedings of Workshop on Child, Computer and Interaction (2009), pp. 7:1\u20137:8","DOI":"10.1145\/1640377.1640384"},{"key":"828_CR9","unstructured":"S. Ghai, Addressing pitch mismatch for children\u2019s automatic speech recognition. Ph.D. thesis, Department of EEE, Indian Institute of Technology Guwahati, India (2011)"},{"key":"828_CR10","unstructured":"A. Hagen, B. Pellom, R. Cole, Children\u2019s speech recognition with application to interactive books and tutors. In Proceedings ASRU (2003), pp. 186\u2013191"},{"issue":"12","key":"828_CR11","doi-asserted-by":"publisher","first-page":"861","DOI":"10.1016\/j.specom.2007.05.004","volume":"49","author":"A Hagen","year":"2007","unstructured":"A. Hagen, B. Pellom, R. Cole, Highly accurate childrens speech recognition for interactive reading tutors using subword units. Speech Commun. 49(12), 861\u2013873 (2007)","journal-title":"Speech Commun."},{"key":"828_CR12","doi-asserted-by":"crossref","first-page":"157","DOI":"10.1016\/S0095-4470(19)31460-3","volume":"8","author":"R Kent","year":"1980","unstructured":"R. Kent, L. Forner, Speech segment durations in sentence recitations by children and adults. J. Phonet. 8, 157\u2013168 (1980)","journal-title":"J. Phonet."},{"issue":"1","key":"828_CR13","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1109\/89.650310","volume":"6","author":"L Lee","year":"1998","unstructured":"L. Lee, R. Rose, A frequency warping approach to speaker normalization. IEEE Trans. Speech Audio Process. 6(1), 49\u201360 (1998)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"828_CR14","unstructured":"S. Lee, A. Potamianos, S.S. Narayanan, Analysis of children\u2019s speech: duration, pitch and formants. In Proceedings INTERSPEECH, vol. 1 (1997), p. 473\u2013476"},{"issue":"3","key":"828_CR15","doi-asserted-by":"publisher","first-page":"1455","DOI":"10.1121\/1.426686","volume":"105","author":"S Lee","year":"1999","unstructured":"S. Lee, A. Potamianos, S.S. Narayanan, Acoustics of childrens speech: developmental changes of temporal and spectral parameters. J. Acoust. Soc. Am. 105(3), 1455\u20131468 (1999)","journal-title":"J. Acoust. Soc. Am."},{"key":"828_CR16","unstructured":"H. Liao, G. Pundak, O. Siohan, M.K. Carroll, N. Coccaro, Q. Jiang, T.N. Sainath, A.W. Senior, F. Beaufays, M. Bacchiani, Large vocabulary automatic speech recognition for children. In Proceedings INTERSPEECH (2015), pp. 1611\u20131615"},{"key":"828_CR17","unstructured":"J.L. Miller, Effects of speaking rate on segmental distinctions. In Perspectives on the study of speech (1981), pp. 39\u201371"},{"issue":"6","key":"828_CR18","doi-asserted-by":"publisher","first-page":"505","DOI":"10.3758\/BF03208147","volume":"46","author":"JL Miller","year":"1989","unstructured":"J.L. Miller, L.E. Volaitis, Effect of speaking rate on the perceptual structure of a phonetic category. Percept. Psychophys. 46(6), 505\u2013512 (1989)","journal-title":"Percept. Psychophys."},{"key":"828_CR19","unstructured":"N. Mirghafori, E. Fosler, N. Morgan, Towards robustness to fast speech in ASR. In Proceedings ICASSP, vol. 1 (1996), pp. 335\u2013338"},{"key":"828_CR20","unstructured":"N. Morgan, E. Fosler, N. Mirghafori, Speech recognition using on-line estimation of speaking rate. In Proceedings EUROSPEECH (1997), pp. 2079\u20132082"},{"key":"828_CR21","doi-asserted-by":"crossref","unstructured":"S.H. ParthasarathiK., B. Hoffmeister, S. Matsoukas, A. Mandal, N. Strom, S. Garimella, fMLLR based feature-space speaker adaptation of DNN acoustic models. In INTERSPEECH (2015)","DOI":"10.21437\/Interspeech.2015-720"},{"issue":"6","key":"828_CR22","doi-asserted-by":"publisher","first-page":"603","DOI":"10.1109\/TSA.2003.818026","volume":"11","author":"A Potaminaos","year":"2003","unstructured":"A. Potaminaos, S. Narayanan, Robust recognition of children speech. IEEE Trans. Speech Audio Process. 11(6), 603\u2013616 (2003)","journal-title":"IEEE Trans. Speech Audio Process."},{"issue":"2","key":"828_CR23","doi-asserted-by":"publisher","first-page":"404","DOI":"10.1016\/j.csl.2010.06.003","volume":"25","author":"D Povey","year":"2011","unstructured":"D. Povey, L. Burget, M. Agarwal, P. Akyazi, F. Kai, A. Ghoshal, O. Glembek, N. Goel, M. Karafi\u00e1t, A. Rastrow, R.C. Rose, P. Schwarz, S. Thomas, The subspace Gaussian mixture model\u2014a structured model for speech recognition. Comput. Speech Lang. 25(2), 404\u2013439 (2011)","journal-title":"Comput. Speech Lang."},{"key":"828_CR24","unstructured":"D. Povey, A. Ghoshal, G. Boulianne, L. Burget, O. Glembek, N. Goel, M. Hannemann, P. Motlicek, Y. Qian, P. Schwarz, J. Silovsky, G. Stemmer, K. Vesely, The Kaldi speech recognition toolkit. In Proceedings ASRU (2011)"},{"key":"828_CR25","volume-title":"Fundamentals of Speech Recognition","author":"L Rabiner","year":"1993","unstructured":"L. Rabiner, B.H. Juang, Fundamentals of Speech Recognition (Prentice-Hall Inc, Upper Saddle River, NJ, 1993)"},{"key":"828_CR26","unstructured":"T. Robinson, J. Fransen, D. Pye, J. Foote, S. Renals, WSJCAM0: a British English speech corpus for large vocabulary continuous speech recognition. In Proceedings ICASSP, vol. 1 (1995), pp. 81\u201384"},{"key":"828_CR27","doi-asserted-by":"crossref","unstructured":"M. Russell, S. D\u2019Arcy, Challenges for computer recognition of children\u2019s speech. In Proceedings Speech and Language Technologies in Education (SLaTE) (2007)","DOI":"10.21437\/SLaTE.2007-26"},{"key":"828_CR28","unstructured":"J. Schalkwyk, D. Beeferman, F. Beaufays, B. Byrne, C. Chelba, M. Cohen, M. Kamvar, B. Strope, Your word is my command: Google search by voice: a case study. In Advances in Speech Recognition: Mobile Environments, Call Centers and Clinics, chap. 4 (2010), pp. 61\u201390"},{"key":"828_CR29","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1017\/S135132491600005X","volume":"23","author":"R Serizel","year":"2016","unstructured":"R. Serizel, D. Giuliani, Deep-neural network approaches for speech recognition with heterogeneous groups of speakers including children. Nat. Lang. Eng. 23, 325\u2013350 (2016)","journal-title":"Nat. Lang. Eng."},{"key":"828_CR30","unstructured":"S. Shahnawazuddin, K.T. Deepak, G. Pradhan, R. Sinha, Enhancing noise and pitch robustness of children\u2019s ASR. In Proceedings ICASSP (2017), pp. 5225\u20135229"},{"key":"828_CR31","unstructured":"S. Shahnawazuddin, A. Dey, R. Sinha, Pitch-adaptive front-end features for robust children\u2019s ASR. In Proceedings INTERSPEECH (2016), pp. 3459\u20133463"},{"key":"828_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.csl.2016.10.004","volume":"43","author":"S Shahnawazuddin","year":"2017","unstructured":"S. Shahnawazuddin, R. Sinha, Sparse coding over redundant dictionaries for fast adaptation of speech recognition system. Comput. Speech Lang. 43, 1\u201317 (2017)","journal-title":"Comput. Speech Lang."},{"issue":"8","key":"828_CR33","doi-asserted-by":"publisher","first-page":"1128","DOI":"10.1109\/LSP.2017.2705085","volume":"24","author":"S Shahnawazuddin","year":"2017","unstructured":"S. Shahnawazuddin, R. Sinha, G. Pradhan, Pitch-normalized acoustic features for robust children\u2019s speech recognition. IEEE Signal Process. Lett. 24(8), 1128\u20131132 (2017)","journal-title":"IEEE Signal Process. Lett."},{"key":"828_CR34","unstructured":"X. Shao, B. Milner, Pitch prediction from MFCC vectors for speech reconstruction. In Proceedings ICASSP (2004), pp. 97\u2013100"},{"key":"828_CR35","unstructured":"M.A. Siegler, R.M. Stern, On the effects of speech rate in large vocabulary speech recognition systems. In Proceedings ICASSP, vol. 1 (1995), pp. 612\u2013615"},{"key":"828_CR36","unstructured":"H. Singer, S. Sagayama, Pitch dependent phone modelling for HMM based speech recognition. In Proceedings ICASSP (1992), pp. 273\u2013276"},{"key":"828_CR37","unstructured":"G. Stemmer, C. Hacker, S. Steidl, E. N\u00f6th, Acoustic normalization of childrens speech. In Proceedings INTERSPEECH (2003), pp. 1313\u20131316"},{"key":"828_CR38","doi-asserted-by":"publisher","first-page":"208","DOI":"10.1037\/0096-1523.7.5.1074","volume":"7","author":"Q Summerfield","year":"1981","unstructured":"Q. Summerfield, Articulatory rate and perceptual constancy in phonetic perception. J. Exp. Psychol. Hum. Perform. Percept. 7, 208\u2013215 (1981)","journal-title":"J. Exp. Psychol. Hum. Perform. Percept."},{"issue":"5","key":"828_CR39","doi-asserted-by":"publisher","first-page":"798","DOI":"10.1109\/JSTSP.2010.2057192","volume":"4","author":"ZH Tan","year":"2010","unstructured":"Z.H. Tan, B. Lindberg, Low-complexity variable frame rate analysis for speech recognition and voice activity detection. IEEE J. Sel. Top. Signal Process. 4(5), 798\u2013807 (2010)","journal-title":"IEEE J. Sel. Top. Signal Process."},{"issue":"1","key":"828_CR40","doi-asserted-by":"publisher","first-page":"232","DOI":"10.1121\/1.3662059","volume":"131","author":"DL Valente","year":"2012","unstructured":"D.L. Valente, H.M. Plevinsky, J.M. Franco, E.C. Heinrichs-Graham, D. Lewis, Experimental investigation of the effects of the acoustical conditions in a simulated classroom on speech recognition and learning in children. J. Acoust. Soc. Am. 131(1), 232\u2013246 (2012)","journal-title":"J. Acoust. Soc. Am."},{"issue":"4","key":"828_CR41","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1016\/S0167-6393(00)00013-3","volume":"32","author":"S Whiteside","year":"2000","unstructured":"S. Whiteside, C. Hodgson, Speech patterns of children and adults elicited via a picture-naming task: an acoustic study. Speech Commun. 32(4), 267\u2013285 (2000)","journal-title":"Speech Commun."},{"key":"828_CR42","unstructured":"J. Wilpon, C. Jacobsen, A study of speech recognition for children and the elderly. In Proceedings ICASSP, vol. 1 (1996), pp. 349\u2013352"},{"key":"828_CR43","unstructured":"P.C. Woodland, Speaker adaptation for continuos density HMMs: a review. In Proceedings ISCA ITRW on Adaptation Methods for Speech Recognition (2001), pp. 11\u201319"},{"key":"828_CR44","unstructured":"H. You, Q. Zhu, A. Alwan, Entropy-based variable frame rate analysis of speech signals and its application to ASR. In Proceedings ICASSP, vol. 1 (2004), pp. 549\u2013522"},{"key":"828_CR45","unstructured":"X. Zhang, J. Trmal, D. Povey, S. Khudanpur, Improving deep neural network acoustic models using generalized maxout networks. In Proceedings ICASSP (2014), pp. 215\u2013219"},{"issue":"5","key":"828_CR46","doi-asserted-by":"publisher","first-page":"1645","DOI":"10.1109\/TASL.2007.899236","volume":"15","author":"X Zhu","year":"2007","unstructured":"X. Zhu, G.T. Beauregard, L.L. Wyse, Real-time signal estimation from modified short-time Fourier transform magnitude spectra. IEEE Trans. Audio Speech Lang. Process. 15(5), 1645\u20131653 (2007)","journal-title":"IEEE Trans. Audio Speech Lang. Process."}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00034-018-0828-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-018-0828-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-018-0828-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,6]],"date-time":"2024-07-06T14:48:00Z","timestamp":1720277280000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00034-018-0828-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,5,5]]},"references-count":46,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2018,12]]}},"alternative-id":["828"],"URL":"https:\/\/doi.org\/10.1007\/s00034-018-0828-2","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"type":"print","value":"0278-081X"},{"type":"electronic","value":"1531-5878"}],"subject":[],"published":{"date-parts":[[2018,5,5]]},"assertion":[{"value":"21 July 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 April 2018","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 April 2018","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 May 2018","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}