{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T08:30:16Z","timestamp":1769157016893,"version":"3.49.0"},"reference-count":73,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2021,9,13]],"date-time":"2021-09-13T00:00:00Z","timestamp":1631491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,9,13]],"date-time":"2021-09-13T00:00:00Z","timestamp":1631491200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Ambient Intell Human Comput"],"published-print":{"date-parts":[[2022,5]]},"DOI":"10.1007\/s12652-021-03468-3","type":"journal-article","created":{"date-parts":[[2021,9,13]],"date-time":"2021-09-13T07:04:31Z","timestamp":1631516671000},"page":"2705-2721","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["In domain training data augmentation on noise robust Punjabi Children speech recognition"],"prefix":"10.1007","volume":"13","author":[{"given":"Virender","family":"Kadyan","sequence":"first","affiliation":[]},{"given":"Puneet","family":"Bawa","sequence":"additional","affiliation":[]},{"given":"Taniya","family":"Hasija","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,9,13]]},"reference":[{"key":"3468_CR1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-10674-4","volume-title":"Feature selection and enhanced krill herd algorithm for text document clustering","author":"LMQ Abualigah","year":"2019","unstructured":"Abualigah LMQ (2019) Feature selection and enhanced krill herd algorithm for text document clustering. Springer, Berlin"},{"key":"3468_CR2","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-020-04839-1","author":"L Abualigah","year":"2020","unstructured":"Abualigah L (2020) Multi-verse optimizer algorithm: a comprehensive survey of its results variants and applications. Neural Comput Appl. https:\/\/doi.org\/10.1007\/s00521-020-04839-1","journal-title":"Neural Comput Appl"},{"key":"3468_CR3","doi-asserted-by":"publisher","first-page":"4773","DOI":"10.1007\/s11227-017-2046-2","volume":"73","author":"LM Abualigah","year":"2017","unstructured":"Abualigah LM, Khader AT (2017) Unsupervised text feature selection technique based on hybrid particle swarm optimization algorithm with genetic operators for the text clustering. J Supercomput 73:4773\u20134795. https:\/\/doi.org\/10.1007\/s11227-017-2046-2","journal-title":"J Supercomput"},{"key":"3468_CR4","doi-asserted-by":"publisher","unstructured":"Al-Ali AKH, Dean D, Senadji B, Baktashmotlagh M, Chandran V (2017) Speaker verification with multi-run ICA based speech enhancement. In: 2017 11th International Conference on Signal Processing and Communication Systems (ICSPCS), (pp 1\u20137). IEEE. https:\/\/doi.org\/10.1109\/icspcs.2017.8270505","DOI":"10.1109\/icspcs.2017.8270505"},{"key":"3468_CR5","doi-asserted-by":"publisher","first-page":"143","DOI":"10.3390\/app6050143","volume":"6","author":"F Al\u00edas","year":"2016","unstructured":"Al\u00edas F, Socor\u00f3 JC, Sevillano X (2016) A review of physical and perceptual feature extraction techniques for speech, music and environmental sounds. Appl Sci 6:143. https:\/\/doi.org\/10.3390\/app6050143","journal-title":"Appl Sci"},{"key":"3468_CR6","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.specom.2013.07.008","volume":"56","author":"L Besacier","year":"2014","unstructured":"Besacier L, Barnard E, Karpov A, Schultz T (2014) Automatic speech recognition for under-resourced languages: a survey. Speech Commun 56:85\u2013100. https:\/\/doi.org\/10.1016\/j.specom.2013.07.008","journal-title":"Speech Commun"},{"issue":"3","key":"3468_CR7","doi-asserted-by":"publisher","first-page":"900","DOI":"10.1109\/tasl.2011.2168209","volume":"20","author":"YHB Chiu","year":"2011","unstructured":"Chiu YHB, Raj B, Stern RM (2011) Learning-based auditory encoding for robust speech recognition. IEEE Trans Audio Speech Lang Process 20(3):900\u2013914. https:\/\/doi.org\/10.1109\/tasl.2011.2168209","journal-title":"IEEE Trans Audio Speech Lang Process"},{"issue":"2\u20133","key":"3468_CR8","doi-asserted-by":"publisher","first-page":"262","DOI":"10.1016\/0165-5876(94)90024-8","volume":"28","author":"CC Crandell","year":"1993","unstructured":"Crandell CC (1993) Speech recognition in noise by children with minimal degrees of sensorineural hearing loss. Int J Pediatr Otorhinolaryngol 28(2\u20133):262. https:\/\/doi.org\/10.1016\/0165-5876(94)90024-8","journal-title":"Int J Pediatr Otorhinolaryngol"},{"key":"3468_CR9","doi-asserted-by":"publisher","unstructured":"Das S, Nix D, Picheny M (1998) Improvements in children's speech recognition performance. In: Proceedings of the 1998 IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP'98 (Cat. No. 98CH36181) (Vol 1, pp 433\u2013436). IEEE. https:\/\/doi.org\/10.1109\/ICASSP.1998.674460","DOI":"10.1109\/ICASSP.1998.674460"},{"key":"3468_CR10","doi-asserted-by":"publisher","unstructured":"Deka A, Deka MK (2018) Spoken dialog system in bodo language for agro services. In: Advances in Electronics, Communication and Computing. Springer, Singapore, pp 623\u2013631 https:\/\/doi.org\/10.1007\/978-981-10-4765-7_65","DOI":"10.1007\/978-981-10-4765-7_65"},{"key":"3468_CR11","doi-asserted-by":"crossref","unstructured":"Deng L, Acero A, Plumpe M, Huang X (2000) Large-vocabulary speech recognition under adverse acoustic environments. In: Sixth International Conference on Spoken Language Processing (ICSLP), pp 806\u2013809","DOI":"10.21437\/ICSLP.2000-657"},{"key":"3468_CR12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2125","author":"A Dey","year":"2018","unstructured":"Dey A, Sarma BD, Lalhminghlui W, Ngente L, Gogoi P, Sarmah P et al (2018) Robust mizo continuous speech recognition. Interspeech. https:\/\/doi.org\/10.21437\/Interspeech.2018-2125","journal-title":"Interspeech"},{"key":"3468_CR13","first-page":"359","volume":"9","author":"M Dua","year":"2012","unstructured":"Dua M, Aggarwal RK, Kadyan V, Dua S (2012a) Punjabi automatic speech recognition using HTK. Int J Comput Sci Issues (IJCSI) 9:359","journal-title":"Int J Comput Sci Issues (IJCSI)"},{"key":"3468_CR14","doi-asserted-by":"publisher","first-page":"2301","DOI":"10.1007\/s12652-018-0828-x","volume":"10","author":"M Dua","year":"2019","unstructured":"Dua M, Aggarwal RK, Biswas M (2019) GFCC based discriminatively trained noise robust continuous ASR system for Hindi language. J Ambient Intell Humaniz Comput 10:2301\u20132314. https:\/\/doi.org\/10.1007\/s12652-018-0828-x","journal-title":"J Ambient Intell Humaniz Comput"},{"key":"3468_CR15","doi-asserted-by":"publisher","unstructured":"Dua M, Aggarwal RK, Kadyan V, Dua S (2012b) Punjabi speech to text system for connected words. https:\/\/doi.org\/10.1049\/cp.2012.2528","DOI":"10.1049\/cp.2012.2528"},{"key":"3468_CR16","first-page":"22","volume":"1","author":"G Fant","year":"1966","unstructured":"Fant G (1966) A note on vocal tract size factors and non-uniform F-pattern scalings. Speech Transmiss Lab Quart Prog Status Rep 1:22\u201330","journal-title":"Speech Transmiss Lab Quart Prog Status Rep"},{"key":"3468_CR17","doi-asserted-by":"publisher","unstructured":"Gaikwad S, Gawali B, Basil M (2019) SCEHMA: speech corpus of english, hindi, marathi and arabic language for advance speech recognition development. In: In International Conference on Applied Computing to Support Industry: Innovation and Technology. Springer, Cham, pp 123\u2013135 https:\/\/doi.org\/10.1007\/978-3-030-38752-5_10","DOI":"10.1007\/978-3-030-38752-5_10"},{"key":"3468_CR18","doi-asserted-by":"publisher","unstructured":"Ghahremani P, Baba Ali B, Povey D, Riedhammer K, Trmal J, Khudanpur S (2014) A pitch extraction algorithm tuned for automatic speech recognition. In Acoustics, Speech and Signal Processing (ICASSP). In: 2014 IEEE International Conference on (pp 2494\u20132498). IEEE. https:\/\/doi.org\/10.1109\/icassp.2014.6854049","DOI":"10.1109\/icassp.2014.6854049"},{"key":"3468_CR19","doi-asserted-by":"crossref","unstructured":"Ghai S, Sinha R (2009) Exploring the role of spectral smoothing in context of children's speech recognition. In: Tenth Annual Conference of the International Speech Communication Association.","DOI":"10.21437\/Interspeech.2009-209"},{"key":"3468_CR20","doi-asserted-by":"publisher","unstructured":"Giurgiu M, Kabir A (2011) Comparison of vocal tract length normalization technique applied for clean and noisy speech. In: 2011 34th International Conference on Telecommunications and Signal Processing (TSP) (pp 351\u2013354). IEEE. https:\/\/doi.org\/10.1109\/tsp.2011.6043710","DOI":"10.1109\/tsp.2011.6043710"},{"issue":"3","key":"3468_CR21","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1016\/0167-6393(94)00059-J","volume":"16","author":"Y Gong","year":"1995","unstructured":"Gong Y (1995) Speech recognition in noisy environments: a survey. Speech Commun 16(3):261\u2013291. https:\/\/doi.org\/10.1016\/0167-6393(94)00059-J","journal-title":"Speech Commun"},{"key":"3468_CR22","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s10772-018-9497-6","volume":"21","author":"J Guglani","year":"2018","unstructured":"Guglani J, Mishra AN (2018) Continuous Punjabi speech recognition model based on Kaldi ASR toolkit. Int J Speech Technol 21:211\u2013216. https:\/\/doi.org\/10.1007\/s10772-018-9497-6","journal-title":"Int J Speech Technol"},{"key":"3468_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2020.107386","volume":"167","author":"J Guglani","year":"2020","unstructured":"Guglani J, Mishra AN (2020) Automatic speech recognition system with pitch dependent features for Punjabi language on KALDI toolkit. Appl Acoust 167:107386. https:\/\/doi.org\/10.1016\/j.apacoust.2020.107386","journal-title":"Appl Acoust"},{"key":"3468_CR24","unstructured":"Gupta N, Mishra AN, Sharma U (2015) Speech Recognition using Hybrid of GFCC and PLP. J Basic Appl Eng Res: 1896\u20131899"},{"key":"3468_CR25","doi-asserted-by":"crossref","unstructured":"Gustafson J, Sj\u00f6lander K (2002) Voice transformations for improving children's speech recognition in a publicly available dialogue system. In: 7th International Conference on Spoken Language Processing (ICSLP2002-INTERSPEECH 2002), Denver, Colorado, USA, September 16\u201320, 2002 (pp 297\u2013300). International Speech Communication Association","DOI":"10.21437\/ICSLP.2002-139"},{"key":"3468_CR26","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1386","author":"W Hartmann","year":"2016","unstructured":"Hartmann W, Ng T, Hsiao R, Tsakalidis S, Schwartz RM (2016) Two-stage data augmentation for low-resourced speech recognition. Interspeech. https:\/\/doi.org\/10.21437\/Interspeech.2016-1386","journal-title":"Interspeech"},{"key":"3468_CR27","unstructured":"Hawley ME, Kryter KD (1957) Effects of noise on speech. In: Harris CM (ed) Handbook of noise control, pp 1-1\u20131-26"},{"key":"3468_CR28","doi-asserted-by":"publisher","unstructured":"Hermansky H, Morgan N, Hirsch HG (1993) Recognition of speech in additive and convolutional noise based on RASTA spectral processing. In: 1993 IEEE International Conference on Acoustics, Speech, and Signal Processing (pp 83\u201386). IEEE. https:\/\/doi.org\/10.1109\/icassp.1993.319236","DOI":"10.1109\/icassp.1993.319236"},{"key":"3468_CR29","doi-asserted-by":"crossref","unstructured":"H\u00f6nig F, Stemmer G, Hacker C, Brugnara F (2005) Revising perceptual linear prediction (PLP). In: Ninth European Conference on Speech Communication and Technology","DOI":"10.21437\/Interspeech.2005-138"},{"key":"3468_CR30","unstructured":"Huang X, Acero A, Hon HW, Reddy R (2001) Spoken language processing: a guide to theory, algorithm, and system development. Prentice Hall PTR"},{"issue":"1","key":"3468_CR31","doi-asserted-by":"publisher","first-page":"510","DOI":"10.1121\/1.405631","volume":"93","author":"JC Junqua","year":"1993","unstructured":"Junqua JC (1993) The Lombard reflex and its role on human listeners and automatic speech recognizers. J Acoust Soc Am 93(1):510\u2013524. https:\/\/doi.org\/10.1121\/1.405631","journal-title":"J Acoust Soc Am"},{"key":"3468_CR32","volume-title":"Acoustic features optimization for punjabi automatic speech recognition system","author":"V Kadyan","year":"2018","unstructured":"Kadyan V (2018) Acoustic features optimization for punjabi automatic speech recognition system. Chitkara University, Punjab"},{"issue":"4","key":"3468_CR33","doi-asserted-by":"publisher","first-page":"761","DOI":"10.1007\/s10772-017-9446-9","volume":"20","author":"V Kadyan","year":"2017","unstructured":"Kadyan V, Mantri A, Aggarwal RK (2017) A heterogeneous speech feature vectors generation approach with hybrid hmm classifiers. Int J Speech Technol 20(4):761\u2013769. https:\/\/doi.org\/10.1007\/s10772-017-9446-9","journal-title":"Int J Speech Technol"},{"issue":"5","key":"3468_CR34","doi-asserted-by":"publisher","first-page":"673","DOI":"10.1080\/03772063.2017.1369370","volume":"64","author":"V Kadyan","year":"2018","unstructured":"Kadyan V, Mantri A, Aggarwal RK (2018) Refinement of HMM model parameters for punjabi automatic speech recognition (PASR) system. IETE J Res 64(5):673\u2013688. https:\/\/doi.org\/10.1080\/03772063.2017.1369370","journal-title":"IETE J Res"},{"key":"3468_CR35","doi-asserted-by":"publisher","DOI":"10.1007\/s11831-020-09414-4","author":"J Kaur","year":"2020","unstructured":"Kaur J, Singh A, Kadyan V (2020) Automatic speech recognition system for tonal languages: state-of-the-art survey. Arch Comput Methods Eng. https:\/\/doi.org\/10.1007\/s11831-020-09414-4","journal-title":"Arch Comput Methods Eng"},{"key":"3468_CR36","doi-asserted-by":"publisher","unstructured":"Kaur H, Kadyan V (2020) Feature space discriminatively trained Punjabi children speech recognition system Using Kaldi Toolkit. Available at SSRN 3565906. https:\/\/doi.org\/10.2139\/ssrn.3565906","DOI":"10.2139\/ssrn.3565906"},{"issue":"1","key":"3468_CR37","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1080\/17549507.2019.1568571","volume":"22","author":"HC Kopera","year":"2020","unstructured":"Kopera HC, Grigos MI (2020) Lexical stress in childhood apraxia of speech: acoustic and kinematic findings. Int J Speech Lang Pathol 22(1):12\u201323. https:\/\/doi.org\/10.1080\/17549507.2019.1568571","journal-title":"Int J Speech Lang Pathol"},{"issue":"5.6","key":"3468_CR38","doi-asserted-by":"publisher","first-page":"703","DOI":"10.1147\/rd.485.0703","volume":"48","author":"M Kumar","year":"2004","unstructured":"Kumar M, Rajput N, Verma A (2004) A large-vocabulary continuous speech recognition system for Hindi. IBM J Res Dev 48(5.6):703\u2013715. https:\/\/doi.org\/10.1147\/rd.485.0703","journal-title":"IBM J Res Dev"},{"key":"3468_CR39","doi-asserted-by":"publisher","unstructured":"Lee L, Rose RC (1996) Speaker normalization using efficient frequency warping procedures. In: 1996 IEEE International Conference on Acoustics, Speech, and Signal Processing Conference Proceedings (Vol 1, pp 353\u2013356). IEEE. https:\/\/doi.org\/10.1109\/icassp.1996.541105","DOI":"10.1109\/icassp.1996.541105"},{"key":"3468_CR40","doi-asserted-by":"publisher","unstructured":"Lippmann R, Martin E, Paul D (1987) Multi-style training for robust isolated-word speech recognition. In: ICASSP'87. IEEE International Conference on Acoustics, Speech, and Signal Processing (Vol 12, pp 705\u2013708). IEEE. https:\/\/doi.org\/10.1109\/icassp.1987.1169544","DOI":"10.1109\/icassp.1987.1169544"},{"issue":"1","key":"3468_CR41","first-page":"38","volume":"79","author":"SA Majeed","year":"2015","unstructured":"Majeed SA, Husain H, Samad SA, Idbeaa TF (2015) Mel frequency cepstral coefficients (mfcc) feature extraction enhancement in the application of speech recognition: a comparison study. J Theor Appl Inf Technol 79(1):38\u201356","journal-title":"J Theor Appl Inf Technol"},{"key":"3468_CR42","doi-asserted-by":"publisher","unstructured":"Marsal PP, Font SP, Hagen A, Bourlard H, Nadeu C (2002) Comparison and combination of RASTA-PLP and FF features in a hybrid HMM\/MLP speech recognition system. In: Seventh International Conference on Spoken Language Processing. https:\/\/doi.org\/10.1109\/TSA.2004.834466","DOI":"10.1109\/TSA.2004.834466"},{"key":"3468_CR43","doi-asserted-by":"crossref","unstructured":"Martin F, Shikano K, Minami Y (1993) Recognition of noisy speech by composition of hidden Markov models. In: Third European Conference on Speech Communication and Technology, pp 1031\u20131034","DOI":"10.21437\/Eurospeech.1993-247"},{"issue":"3","key":"3468_CR44","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1121\/10.0000824","volume":"147","author":"PH Milenkovic","year":"2020","unstructured":"Milenkovic PH, Wagner M, Kent RD, Story BH, Vorperian HK (2020) Effects of sampling rate and type of anti-aliasing filter on linear-predictive estimates of formant frequencies in men, women, and children. J Acous Soc Am 147(3):221\u2013227. https:\/\/doi.org\/10.1121\/10.0000824","journal-title":"J Acous Soc Am"},{"key":"3468_CR45","doi-asserted-by":"publisher","unstructured":"Milne B (2002) A comparison of front-end configurations for robust speech recognition. In: 2002 IEEE International Conference on Acoustics, Speech, and Signal Processing (Vol 1, pp I-797). IEEE. https:\/\/doi.org\/10.1109\/icassp.2002.5743838","DOI":"10.1109\/icassp.2002.5743838"},{"key":"3468_CR46","doi-asserted-by":"publisher","first-page":"233121652094698","DOI":"10.1177\/2331216520946983","volume":"24","author":"SM Misurelli","year":"2020","unstructured":"Misurelli SM, Goupell MJ, Burg AE, Jocewicz R, Kan A, Litovsky RY (2020) Auditory attention and spatial unmasking in children with cochlear implants. Trends Hear 24:2331216520946983. https:\/\/doi.org\/10.1177\/2331216520946983","journal-title":"Trends Hear"},{"key":"3468_CR47","doi-asserted-by":"publisher","unstructured":"Mitra V, Franco H, Graciarena M, Mandal A (2012) Normalized amplitude modulation features for large vocabulary noise-robust speech recognition. In: 2012 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp 4117\u20134120). IEEE. https:\/\/doi.org\/10.1109\/icassp.2012.6288824","DOI":"10.1109\/icassp.2012.6288824"},{"key":"3468_CR48","doi-asserted-by":"crossref","unstructured":"Morris AC, Maier V, Green P (2004) From WER and RIL to MER and WIL: improved evaluation measures for connected speech recognition. In: Eighth International Conference on Spoken Language Processing","DOI":"10.21437\/Interspeech.2004-668"},{"key":"3468_CR49","doi-asserted-by":"publisher","unstructured":"Mrvaljevic N, Sun Y (2009) Comparison between speaker dependent mode and speaker independent mode for voice recognition. In: 2009 IEEE 35th Annual Northeast Bioengineering Conference, pp 1\u20132. IEEEhttps:\/\/doi.org\/10.1109\/nebc.2009.4967804","DOI":"10.1109\/nebc.2009.4967804"},{"issue":"3","key":"3468_CR50","doi-asserted-by":"publisher","first-page":"336","DOI":"10.1097\/AUD.0b013e3181d3d514","volume":"31","author":"AC Neuman","year":"2010","unstructured":"Neuman AC, Wroblewski M, Hajicek J, Rubinstein A (2010) Combined effects of noise and reverberation on speech recognition performance of normal-hearing children and adults. Ear Hear 31(3):336\u2013344. https:\/\/doi.org\/10.1097\/AUD.0b013e3181d3d514","journal-title":"Ear Hear"},{"issue":"4","key":"3468_CR51","doi-asserted-by":"publisher","first-page":"240","DOI":"10.1080\/02564602.2015.1010611","volume":"32","author":"J Padmanabhan","year":"2015","unstructured":"Padmanabhan J, Johnson Premkumar MJ (2015) Machine learning in automatic speech recognition: a survey. IETE Tech Rev 32(4):240\u2013251. https:\/\/doi.org\/10.1080\/02564602.2015.1010611","journal-title":"IETE Tech Rev"},{"key":"3468_CR52","doi-asserted-by":"crossref","unstructured":"Paliwal KK (1995) Interpolation properties of linear prediction parametric representations. In: Fourth European Conference on Speech Communication and Technology","DOI":"10.21437\/Eurospeech.1995-266"},{"key":"3468_CR53","unstructured":"Pelecanos J, Sridharan S (2001) Feature warping for robust speaker verification. In: Proceedings of 2001 a speaker odyssey: the speaker recognition workshop. European Speech Communication Association, pp 213\u2013218. Crete, Greece"},{"key":"3468_CR54","unstructured":"Povey D, Ghoshal A, Boulianne G, Burget L, Glembek O, Goel N, et al. (2011) The Kaldi speech recognition toolkit. In: IEEE 2011 workshop on automatic speech recognition and understanding (No. CONF). IEEE Signal Processing Society"},{"issue":"5","key":"3468_CR55","doi-asserted-by":"publisher","first-page":"419","DOI":"10.1109\/tassp.1978.1163137","volume":"26","author":"M Sambur","year":"1978","unstructured":"Sambur M (1978) Adaptive noise canceling for speech signals. IEEE Trans Acoust Speech Signal Process 26(5):419\u2013423. https:\/\/doi.org\/10.1109\/tassp.1978.1163137","journal-title":"IEEE Trans Acoust Speech Signal Process"},{"key":"3468_CR56","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1016\/j.patrec.2019.12.019","volume":"131","author":"S Shahnawazuddin","year":"2020","unstructured":"Shahnawazuddin S, Adiga N, Kathania HK, Sai BT (2020) Creating speaker independent ASR system through prosody modification based data augmentation. Pattern Recogn Lett 131:213\u2013218. https:\/\/doi.org\/10.1016\/j.patrec.2019.12.019","journal-title":"Pattern Recogn Lett"},{"key":"3468_CR57","doi-asserted-by":"publisher","unstructured":"Shahnawazuddin S, Deepak KT, Pradhan G, Sinha R (2017) Enhancing noise and pitch robustness of children's ASR. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp 5225\u20135229). IEEE. https:\/\/doi.org\/10.1109\/icassp.2017.7953153","DOI":"10.1109\/icassp.2017.7953153"},{"key":"3468_CR58","doi-asserted-by":"publisher","unstructured":"Shao Y, Jin Z, Wang D, Srinivasan S (2009) An auditory-based feature for robust speech recognition. In: 2009 IEEE International Conference on Acoustics, Speech and Signal Processing (pp 4625\u20134628). IEEE. https:\/\/doi.org\/10.1186\/1687-4722-2014-21","DOI":"10.1186\/1687-4722-2014-21"},{"key":"3468_CR59","doi-asserted-by":"publisher","unstructured":"Shrawankar U, Thakare V (2010) Feature extraction for a speech recognition system in noisy environment: a study. In: 2010 Second International Conference on Computer Engineering and Applications. https:\/\/doi.org\/10.1109\/iccea.2010.76","DOI":"10.1109\/iccea.2010.76"},{"key":"3468_CR60","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-019-09775-8","author":"A Singh","year":"2019","unstructured":"Singh A, Kadyan V, Kumar M, Bassan N (2019) ASRoIL: a comprehensive survey for automatic speech recognition of Indian languages. Artif Intell Rev. https:\/\/doi.org\/10.1007\/s10462-019-09775-8","journal-title":"Artif Intell Rev"},{"key":"3468_CR61","doi-asserted-by":"crossref","unstructured":"Sun S, Yeh CF, Ostendorf M, Hwang MY, Xie L (2018) Training augmentation with adversarial examples for robust speech recognition. arXiv preprint https:\/\/arxiv.org\/abs\/1806.02782","DOI":"10.21437\/Interspeech.2018-1247"},{"key":"3468_CR62","doi-asserted-by":"crossref","unstructured":"Sung YH (2010) Hidden conditional random fields for speech recognition. Doctoral dissertation, Stanford University","DOI":"10.1109\/ASRU.2009.5373329"},{"key":"3468_CR63","doi-asserted-by":"crossref","unstructured":"Tuerk C, Robinson T (1993) A new frequency shift function for reducing inter-speaker variance. In: Third European Conference on Speech Communication and Technology","DOI":"10.21437\/Eurospeech.1993-101"},{"issue":"3","key":"3468_CR64","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1016\/0167-6393(93)90095-3","volume":"12","author":"A Varga","year":"1993","unstructured":"Varga A, Steeneken HJM (1993) Assessment for automatic speech recognition: II. NOISEX-92: a database and an experiment to study the effect of additive noise on speech recognition systems. Speech Commun 12(3):247\u2013251. https:\/\/doi.org\/10.1016\/0167-6393(93)90095-3","journal-title":"Speech Commun"},{"key":"3468_CR65","doi-asserted-by":"publisher","first-page":"2421","DOI":"10.3389\/fpsyg.2019.02421","volume":"10","author":"E Walker","year":"2019","unstructured":"Walker E, Sapp C, Oleson J, McCreery RW (2019) Longitudinal speech recognition in noise in children: effects of hearing status and vocabulary. Front Psychol 10:2421. https:\/\/doi.org\/10.3389\/fpsyg.2019.02421","journal-title":"Front Psychol"},{"issue":"2","key":"3468_CR66","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1109\/mcse.2011.37","volume":"13","author":"SVD Walt","year":"2011","unstructured":"Walt SVD, Colbert SC, Varoquaux G (2011) The NumPy array: a structure for efficient numerical computation. Comput Sci Eng 13(2):22\u201330. https:\/\/doi.org\/10.1109\/mcse.2011.37","journal-title":"Comput Sci Eng"},{"issue":"2","key":"3468_CR67","doi-asserted-by":"publisher","first-page":"275","DOI":"10.3758\/BF03211895","volume":"59","author":"RM Warren","year":"1997","unstructured":"Warren RM, Hainsworth KR, Brubaker BS, Bashford JA, Healy EW (1997) Spectral restoration of speech: intelligibility is increased by inserting noise in spectral gaps. Percept Psychophys 59(2):275\u2013283. https:\/\/doi.org\/10.3758\/BF03211895","journal-title":"Percept Psychophys"},{"issue":"2","key":"3468_CR68","doi-asserted-by":"publisher","first-page":"158","DOI":"10.1016\/s1007-0214(05)70048-1","volume":"10","author":"Z Wu","year":"2005","unstructured":"Wu Z, Cao Z (2005) Improved MFCC-based feature for robust speaker identification. Tsinghua Sci Technol 10(2):158\u2013161. https:\/\/doi.org\/10.1016\/s1007-0214(05)70048-1","journal-title":"Tsinghua Sci Technol"},{"key":"3468_CR69","doi-asserted-by":"publisher","DOI":"10.1155\/S111086570440225X","volume":"11","author":"B Wu","year":"2004","unstructured":"Wu B, Ren X, Liu C, Zhang Y (2004) A novel speech\/noise discrimination method for embedded ASR system. EURASIP J Adv Signal Process 11:951918. https:\/\/doi.org\/10.1155\/S111086570440225X","journal-title":"EURASIP J Adv Signal Process"},{"key":"3468_CR70","doi-asserted-by":"publisher","unstructured":"Xiang B, Chaudhari UV, Navratil J, Ramaswamy GN, Gopinath RA (2002) Short-time Gaussianization for robust speaker verification. In: IEEE International Conference on Acoustics Speech and Signal Processing (Vol. 1, pp. I-681). IEEE. https:\/\/doi.org\/10.1109\/icassp.2002.5743809","DOI":"10.1109\/icassp.2002.5743809"},{"key":"3468_CR71","unstructured":"Xu D, Yapanel U, Gray S, Gilkerson J, Richards J, Hansen J (2008) Signal processing for young child speech language development. In: First Workshop on Child, Computer and Interaction."},{"issue":"1","key":"3468_CR72","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1016\/j.specom.2003.08.006","volume":"42","author":"Z Zhang","year":"2004","unstructured":"Zhang Z, Furui S (2004) Piecewise-linear transformation-based HMM adaptation for noisy speech. Speech Commun 42(1):43\u201358. https:\/\/doi.org\/10.1016\/j.specom.2003.08.006","journal-title":"Speech Commun"},{"key":"3468_CR73","doi-asserted-by":"crossref","unstructured":"Zhen B, Wu X, Liu Z, Chi H (2000) On the importance of components of the MFCC in speech and speaker recognition. In: Sixth International Conference on Spoken Language Processing (ICSLP)","DOI":"10.21437\/ICSLP.2000-313"}],"container-title":["Journal of Ambient Intelligence and Humanized Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12652-021-03468-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s12652-021-03468-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12652-021-03468-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,9]],"date-time":"2023-01-09T01:52:50Z","timestamp":1673229170000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s12652-021-03468-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,13]]},"references-count":73,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2022,5]]}},"alternative-id":["3468"],"URL":"https:\/\/doi.org\/10.1007\/s12652-021-03468-3","relation":{},"ISSN":["1868-5137","1868-5145"],"issn-type":[{"value":"1868-5137","type":"print"},{"value":"1868-5145","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,9,13]]},"assertion":[{"value":"22 March 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 August 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 September 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Authors have no conflict of interest in this work.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}