{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,12]],"date-time":"2026-05-12T21:23:39Z","timestamp":1778621019390,"version":"3.51.4"},"reference-count":152,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2023,8,15]],"date-time":"2023-08-15T00:00:00Z","timestamp":1692057600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,8,15]],"date-time":"2023-08-15T00:00:00Z","timestamp":1692057600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Department of Science and Technology","award":["SP\/YO\/382\/2018(G)"],"award-info":[{"award-number":["SP\/YO\/382\/2018(G)"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-16438-y","type":"journal-article","created":{"date-parts":[[2023,8,15]],"date-time":"2023-08-15T02:01:43Z","timestamp":1692064903000},"page":"23367-23412","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":82,"title":["A comprehensive survey on automatic speech recognition using neural networks"],"prefix":"10.1007","volume":"83","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7763-9174","authenticated-orcid":false,"given":"Amandeep Singh","family":"Dhanjal","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Williamjeet","family":"Singh","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,8,15]]},"reference":[{"issue":"10","key":"16438_CR1","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1109\/TASLP.2014.2339736","volume":"22","author":"O Abdel-Hamid","year":"2014","unstructured":"Abdel-Hamid O, Mohamed A-r, Jiang H, Deng L, Penn G, Yu D (2014) Convolutional Neural Networks for Speech Recognition. IEEE\/ACM Transactions on Audio, Speech, and Language Processing 22(10):1533\u20131545. https:\/\/doi.org\/10.1109\/TASLP.2014.2339736","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"6","key":"16438_CR2","doi-asserted-by":"publisher","first-page":"863","DOI":"10.1049\/iet-cds.2018.5225","volume":"13","author":"S Abed","year":"2019","unstructured":"Abed S, Mohd BJ, Al Shayeji MH (2019) Implementation of speech feature extraction for low-resource devices. IET Circuits, Devices and Systems 13(6):863\u2013872. https:\/\/doi.org\/10.1049\/iet-cds.2018.5225","journal-title":"IET Circuits, Devices and Systems"},{"key":"16438_CR3","doi-asserted-by":"publisher","unstructured":"Addarrazi I, Satori H, Satori K (2018) Building a first amazigh database for automatic audiovisual speech recognition system. ACM International Conference Proceeding Series 94\u201399. https:\/\/doi.org\/10.1145\/3289100.3289116","DOI":"10.1145\/3289100.3289116"},{"key":"16438_CR4","doi-asserted-by":"publisher","unstructured":"Jin Y, Wen B, Gu Z, Jiang X, Shu X, Zeng Z, Zhang Y, Guo Z, Chen Y, Zheng T, Yue Y, Zhang H, Ding H (2020) Deep-Learning-Enabled MXene-Based Artificial Throat: Toward Sound Detection and Speech Recognition. Advanced Materials Technologies 5(9): 2000262. https:\/\/doi.org\/10.1002\/admt.202000262","DOI":"10.1002\/admt.202000262"},{"key":"16438_CR5","doi-asserted-by":"publisher","unstructured":"Padmanabhan J, Johnson Premkumar MJ (2015) Machine Learning in Automatic Speech Recognition: A Survey. IETE Technical Review 32(4): 240\u2013251. https:\/\/doi.org\/10.1080\/02564602.2015.1010611","DOI":"10.1080\/02564602.2015.1010611"},{"key":"16438_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.csl.2019.101055","volume":"62","author":"T Aguiar de Lima","year":"2020","unstructured":"Aguiar de Lima T, Da Costa-Abreu M (2020) A survey on automatic speech recognition systems for Portuguese language and its variations. Computer Speech and Language 62:1\u201364. https:\/\/doi.org\/10.1016\/j.csl.2019.101055","journal-title":"Computer Speech and Language"},{"issue":"5","key":"16438_CR7","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1016\/j.neucom.2020.07.053","volume":"417","author":"M Alam","year":"2020","unstructured":"Alam M, Samad MD, Vidyaratne L, Glandon A, Iftekharuddin KM (2020) Survey on Deep Neural Networks in Speech and Vision Systems. Neurocomputing 417(5):302\u2013321. https:\/\/doi.org\/10.1016\/j.neucom.2020.07.053","journal-title":"Neurocomputing"},{"key":"16438_CR8","doi-asserted-by":"publisher","unstructured":"Dargan S, Kumar M, Ayyagari MR, Kumar G (2020) A Survey of Deep Learning and Its Applications: A New Paradigm to Machine Learning. Archives of Computational Methods in Engineering 27(4): 1071\u20131092. https:\/\/doi.org\/10.1007\/s11831-019-09344-w","DOI":"10.1007\/s11831-019-09344-w"},{"key":"16438_CR9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.cosrev.2020.100311","volume":"38","author":"FE Ayo","year":"2020","unstructured":"Ayo FE, Folorunso O, Ibharalu FT, Osinuga IA (2020) Machine learning techniques for hate speech classification of twitter data: State-of-The-Art, future challenges and research directions. Computer Science Review 38:1\u201334. https:\/\/doi.org\/10.1016\/j.cosrev.2020.100311","journal-title":"Computer Science Review"},{"issue":"February","key":"16438_CR10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.specom.2020.04.002","volume":"122","author":"A Azarang","year":"2020","unstructured":"Azarang A, Kehtarnavaz N (2020) A review of multi-objective deep learning speech denoising methods. Speech Communication 122(February):1\u201310. https:\/\/doi.org\/10.1016\/j.specom.2020.04.002","journal-title":"Speech Communication"},{"key":"16438_CR11","doi-asserted-by":"publisher","unstructured":"Goh YH, Raveendran P, Goh YL (2015) Robust speech recognition system using bidirectional Kalman filter. IET Signal Processing 9(6): 491\u2013497. https:\/\/doi.org\/10.1049\/iet-spr.2014.0109","DOI":"10.1049\/iet-spr.2014.0109"},{"issue":"19","key":"16438_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3390\/app10196936","volume":"10","author":"J-U Bang","year":"2020","unstructured":"Bang J-U, Yun S, Kim S-H, Choi M-Y, Lee M-K, Kim Y-J, Kim D-H, Park J, Lee Y-J, Kim S-H (2020) KsponSpeech: Korean Spontaneous Speech Corpus for Automatic Speech Recognition. Applied Sciences 10(19):1\u201317. https:\/\/doi.org\/10.3390\/app10196936","journal-title":"Applied Sciences"},{"key":"16438_CR13","doi-asserted-by":"publisher","unstructured":"Singh A, Kadyan V, Kumar M, Bassan N (2020) ASRoIL: a Comprehensive Survey for Automatic Speech Recognition of Indian Languages vol. 53 pp. 3673\u20133704. Springer ???. https:\/\/doi.org\/10.1007\/s10462-019-09775-8","DOI":"10.1007\/s10462-019-09775-8"},{"issue":"27\u201328","key":"16438_CR14","doi-asserted-by":"publisher","first-page":"19669","DOI":"10.1007\/s11042-020-08782-0","volume":"79","author":"A Becerra","year":"2020","unstructured":"Becerra A, de la Rosa JI, Gonz\u00e1lez E, Pedroza AD, Escalante NI, Santos E (2020) A comparative case study of neural network training by using frame-level cost functions for automatic speech recognition purposes in Spanish. Multimedia Tools and Applications 79(27\u201328):19669\u201319715. https:\/\/doi.org\/10.1007\/s11042-020-08782-0","journal-title":"Multimedia Tools and Applications"},{"issue":"10","key":"16438_CR15","doi-asserted-by":"publisher","first-page":"4213","DOI":"10.1007\/s12652-020-01703-x","volume":"11","author":"S Bhatt","year":"2020","unstructured":"Bhatt S, Dev A, Jain A (2020) Confusion analysis in phoneme based speech recognition in Hindi. Journal of Ambient Intelligence and Humanized Computing 11(10):4213\u20134238. https:\/\/doi.org\/10.1007\/s12652-020-01703-x","journal-title":"Journal of Ambient Intelligence and Humanized Computing"},{"issue":"6","key":"16438_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1080\/02522667.2020.1809091","volume":"41","author":"S Bhatt","year":"2020","unstructured":"Bhatt S, Jain A, Dev A (2020) Syllable based Hindi speech recognition. J Inf Optim Sci 41(6):1\u201320. https:\/\/doi.org\/10.1080\/02522667.2020.1809091","journal-title":"J Inf Optim Sci"},{"issue":"August","key":"16438_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2020.103903","volume":"95","author":"MC Bingol","year":"2020","unstructured":"Bingol MC, Aydogmus O (2020) Performing predefined tasks using the human-robot interaction on speech recognition for an industrial robot. Eng Appl Artif Intell 95(August):103903. https:\/\/doi.org\/10.1016\/j.engappai.2020.103903","journal-title":"Eng Appl Artif Intell"},{"key":"16438_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2020.113402","volume":"153","author":"JJ Bird","year":"2020","unstructured":"Bird JJ, Wanner E, Ek\u00e1rt A, Faria DR (2020) Optimisation of phonetic aware speech recognition through multi-objective evolutionary algorithms. Expert Systems with Applications 153:113402. https:\/\/doi.org\/10.1016\/j.eswa.2020.113402","journal-title":"Expert Systems with Applications"},{"key":"16438_CR19","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1016\/j.specom.2015.12.003","volume":"77","author":"M Cai","year":"2016","unstructured":"Cai M, Liu J (2016) Maxout neurons for deep convolutional and LSTM neural networks in speech recognition. Speech Communication 77:53\u201364. https:\/\/doi.org\/10.1016\/j.specom.2015.12.003","journal-title":"Speech Communication"},{"issue":"2","key":"16438_CR20","first-page":"65","volume":"18","author":"A Caranica","year":"2016","unstructured":"Caranica A, Cucu H, Buzo A, Burileanu C (2016) On the design of an automatic speech recognition system for Romanian language. Control Engineering and Applied Informatics 18(2):65\u201376","journal-title":"Control Engineering and Applied Informatics"},{"key":"16438_CR21","doi-asserted-by":"publisher","unstructured":"Keshet J (2018) Automatic speech recognition: A primer for speech-language pathology researchers. International Journal of Speech-Language Pathology 20(6): 599\u2013609. https:\/\/doi.org\/10.1080\/17549507.2018.1510033","DOI":"10.1080\/17549507.2018.1510033"},{"issue":"1","key":"16438_CR22","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1049\/cje.2018.11.008","volume":"28","author":"G Cheng","year":"2019","unstructured":"Cheng G, Li X, Yan Y (2019) Using Highway Connections to Enable Deep Small-footprint LSTM-RNNs for Speech Recognition. Chin J Electron 28(1):107\u2013112. https:\/\/doi.org\/10.1049\/cje.2018.11.008","journal-title":"Chin J Electron"},{"key":"16438_CR23","doi-asserted-by":"publisher","unstructured":"Kaur J, Singh A, Kadyan V (2020) Automatic Speech Recognition System for Tonal Languages: State-of-the-Art Survey. Archives of Computational Methods in Engineering (0123456789). https:\/\/doi.org\/10.1007\/s11831-020-09414-4","DOI":"10.1007\/s11831-020-09414-4"},{"key":"16438_CR24","doi-asserted-by":"publisher","unstructured":"Uma Maheswari S, Shahina A, Nayeemulla Khan A (2021) Understanding Lombard speech: a review of compensation techniques towards improving speech based recognition systems. Artif Intell Rev 54(4): 2495\u20132523. https:\/\/doi.org\/10.1007\/s10462-020-09907-5","DOI":"10.1007\/s10462-020-09907-5"},{"issue":"2","key":"16438_CR25","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1002\/cae.21884","volume":"26","author":"KA Darabkh","year":"2018","unstructured":"Darabkh KA, Haddad L, Sweidan SZ, Hawa M, Saifan R, Alnabelsi SH (2018) An efficient speech recognition system for arm-disabled students based on isolated words. Computer Applications in Engineering Education 26(2):285\u2013301. https:\/\/doi.org\/10.1002\/cae.21884","journal-title":"Computer Applications in Engineering Education"},{"issue":"4","key":"16438_CR26","doi-asserted-by":"publisher","first-page":"1071","DOI":"10.1007\/s11831-019-09344-w","volume":"27","author":"S Dargan","year":"2020","unstructured":"Dargan S, Kumar M, Ayyagari MR, Kumar G (2020) A Survey of Deep Learning and Its Applications: A New Paradigm to Machine Learning. Archives of Computational Methods in Engineering 27(4):1071\u20131092. https:\/\/doi.org\/10.1007\/s11831-019-09344-w","journal-title":"Archives of Computational Methods in Engineering"},{"key":"16438_CR27","doi-asserted-by":"publisher","unstructured":"Deepa P, Khilar R (2022) Speech technology in healthcare. Measurement: Sensors 24(August):100565. https:\/\/doi.org\/10.1016\/j.measen.2022.100565","DOI":"10.1016\/j.measen.2022.100565"},{"key":"16438_CR28","doi-asserted-by":"publisher","unstructured":"Muhammad AN, Aseere AM, Chiroma H, Shah H, Gital AY, Hashem IAT (2021) Deep Learning Application in Smart Cities: Recent Development, Taxonomy, Challenges and Research Prospects vol. 33 pp. 2973\u20133009. Springer ???. https:\/\/doi.org\/10.1007\/s00521-020-05151-8","DOI":"10.1007\/s00521-020-05151-8"},{"issue":"1","key":"16438_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-020-00391-w","volume":"8","author":"A El Hannani","year":"2021","unstructured":"El Hannani A, Errattahi R, Salmam FZ, Hain T, Ouahmane H (2021) Evaluation of the effectiveness and efficiency of state-of-the-art features and models for automatic speech recognition error detection. Journal of Big Data 8(1):1\u201316. https:\/\/doi.org\/10.1186\/s40537-020-00391-w","journal-title":"Journal of Big Data"},{"issue":"33\u201334","key":"16438_CR30","doi-asserted-by":"publisher","first-page":"24013","DOI":"10.1007\/s11042-019-08293-7","volume":"79","author":"SA El-Moneim","year":"2020","unstructured":"El-Moneim SA, Nassar MA, Dessouky MI, Ismail NA, El-Fishawy AS, Abd El-Samie FE (2020) Text-independent speaker recognition using LSTM-RNN and speech enhancement. Multimedia Tools and Applications 79(33\u201334):24013\u201324028. https:\/\/doi.org\/10.1007\/s11042-019-08293-7","journal-title":"Multimedia Tools and Applications"},{"key":"16438_CR31","doi-asserted-by":"publisher","unstructured":"Patel H, Thakkar A, Pandya M, Makwana K (2018) Neural network with deep learning architectures. J Inf Optim Sci39(1): 31\u201338. https:\/\/doi.org\/10.1080\/02522667.2017.1372908","DOI":"10.1080\/02522667.2017.1372908"},{"issue":"3","key":"16438_CR32","doi-asserted-by":"publisher","first-page":"563","DOI":"10.1007\/s10772-017-9427-z","volume":"20","author":"H Frihia","year":"2017","unstructured":"Frihia H, Bahi H (2017) HMM\/SVM segmentation and labelling of Arabic speech for speech recognition applications. International Journal of Speech Technology 20(3):563\u2013573. https:\/\/doi.org\/10.1007\/s10772-017-9427-z","journal-title":"International Journal of Speech Technology"},{"key":"16438_CR33","doi-asserted-by":"publisher","unstructured":"Khan A, Sohail A, Zahoora U, Qureshi AS (2020) A Survey of the Recent Architectures of Deep Convolutional Neural Networks vol. 53 pp. 5455\u20135516. Springer ???. https:\/\/doi.org\/10.1007\/s10462-020-09825-6","DOI":"10.1007\/s10462-020-09825-6"},{"key":"16438_CR34","doi-asserted-by":"publisher","unstructured":"Garain A, Singh PK, Sarkar R (2021) FuzzyGCP: A deep learning architecture for automatic spoken language identification from speech signals. Expert Systems with Applications 168:1\u201314. https:\/\/doi.org\/10.1016\/j.eswa.2020.114416","DOI":"10.1016\/j.eswa.2020.114416"},{"issue":"6","key":"16438_CR35","doi-asserted-by":"publisher","first-page":"491","DOI":"10.1049\/iet-spr.2014.0109","volume":"9","author":"YH Goh","year":"2015","unstructured":"Goh YH, Raveendran P, Goh YL (2015) Robust speech recognition system using bidirectional Kalman filter. IET Signal Processing 9(6):491\u2013497. https:\/\/doi.org\/10.1049\/iet-spr.2014.0109","journal-title":"IET Signal Processing"},{"issue":"3","key":"16438_CR36","doi-asserted-by":"publisher","first-page":"1177","DOI":"10.1007\/s00034-017-0598-2","volume":"37","author":"R Golda Brunet","year":"2018","unstructured":"Golda Brunet R, Hema Murthy A (2018) Transcription Correction Using Group Delay Processing for Continuous Speech Recognition. Circuits, Systems, and Signal Processing 37(3):1177\u20131202. https:\/\/doi.org\/10.1007\/s00034-017-0598-2","journal-title":"Circuits, Systems, and Signal Processing"},{"key":"16438_CR37","doi-asserted-by":"publisher","unstructured":"Zhu T, Cheng C (2020) Joint CTC-Attention End-to-End Speech Recognition with a Triangle Recurrent Neural Network Encoder. Journal of Shanghai Jiaotong University (Science) 25(1): 70\u201375. https:\/\/doi.org\/10.1007\/s12204-019-2147-6","DOI":"10.1007\/s12204-019-2147-6"},{"issue":"2","key":"16438_CR38","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1049\/iet-spr.2018.5131","volume":"13","author":"A Guerid","year":"2019","unstructured":"Guerid A, Houacine A (2019) Recognition of isolated digits using DNN-HMM and harmonic noise model. IET Signal Processing 13(2):207\u2013214. https:\/\/doi.org\/10.1049\/iet-spr.2018.5131","journal-title":"IET Signal Processing"},{"key":"16438_CR39","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.csl.2020.101077","volume":"63","author":"P Gurunath Shivakumar","year":"2020","unstructured":"Gurunath Shivakumar P, Georgiou P (2020) Transfer learning from adult to children for speech recognition: Evaluation, analysis and recommendations. Computer Speech and Language 63:1\u201321. https:\/\/doi.org\/10.1016\/j.csl.2020.101077","journal-title":"Computer Speech and Language"},{"key":"16438_CR40","doi-asserted-by":"publisher","unstructured":"Donkers T, Loepp B, Ziegler J (2017) Sequential User-based Recurrent Neural Network Recommendations. In: Proceedings of the Eleventh ACM Conference on Recommender Systems pp. 152\u2013160. ACM New York, NY, USA. https:\/\/doi.org\/10.1145\/3109859.3109877","DOI":"10.1145\/3109859.3109877"},{"key":"16438_CR41","doi-asserted-by":"publisher","unstructured":"Kang J, Zhang W-Q, Liu W-W, Liu J, Johnson MT (2018) Advanced recurrent network-based hybrid acoustic models for low resource speech recognition. EURASIP Journal on Audio, Speech, and Music Processing 6(1): 1\u201315. https:\/\/doi.org\/10.1186\/s13636-018-0128-6","DOI":"10.1186\/s13636-018-0128-6"},{"key":"16438_CR42","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1186\/s13636-020-0170-z","volume":"1","author":"J Hou","year":"2020","unstructured":"Hou J, Guo W, Song Y (2020) Dai L-R (2020) Segment boundary detection directed attention for online end-to-end speech recognition. EURASIP Journal on Audio, Speech, and Music Processing 1:3. https:\/\/doi.org\/10.1186\/s13636-020-0170-z","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"key":"16438_CR43","doi-asserted-by":"publisher","unstructured":"CHENG G, LI X, YAN Y (2019) Using Highway Connections to Enable Deep Small-footprint LSTM-RNNs for Speech Recognition. Chin J Electron 28(1): 107\u2013112. https:\/\/doi.org\/10.1049\/cje.2018.11.008","DOI":"10.1049\/cje.2018.11.008"},{"key":"16438_CR44","doi-asserted-by":"publisher","unstructured":"Ayo FE, Folorunso O, Ibharalu FT, Osinuga IA (2020) Machine learning techniques for hate speech classification of twitter data: State-of-The-Art, future challenges and research directions. Computer Science Review 38 1\u201334. https:\/\/doi.org\/10.1016\/j.cosrev.2020.100311","DOI":"10.1016\/j.cosrev.2020.100311"},{"key":"16438_CR45","doi-asserted-by":"publisher","unstructured":"Jahangir R, Teh YW, Hanif F, Mujtaba G (2021) Deep learning approaches for speech emotion recognition: State of the art and research challenges. Multimedia Tools and Applications 1\u201366. https:\/\/doi.org\/10.1007\/s11042-020-09874-7","DOI":"10.1007\/s11042-020-09874-7"},{"issue":"4","key":"16438_CR46","doi-asserted-by":"publisher","first-page":"799","DOI":"10.1007\/s10772-020-09690-2","volume":"23","author":"K Jermsittiparsert","year":"2020","unstructured":"Jermsittiparsert K, Abdurrahman A, Siriattakul P, Sundeeva LA, Hashim W, Rahim R, Maseleno A (2020) Pattern recognition and features selection for speech emotion recognition model using deep learning. International Journal of Speech Technology 23(4):799\u2013806. https:\/\/doi.org\/10.1007\/s10772-020-09690-2","journal-title":"International Journal of Speech Technology"},{"issue":"9","key":"16438_CR47","doi-asserted-by":"publisher","first-page":"2000262","DOI":"10.1002\/admt.202000262","volume":"5","author":"Y Jin","year":"2020","unstructured":"Jin Y, Wen B, Gu Z, Jiang X, Shu X, Zeng Z, Zhang Y, Guo Z, Chen Y, Zheng T, Yue Y, Zhang H, Ding H (2020) Deep-Learning-Enabled MXene-Based Artificial Throat: Toward Sound Detection and Speech Recognition. Advanced Materials Technologies 5(9):2000262. https:\/\/doi.org\/10.1002\/admt.202000262","journal-title":"Advanced Materials Technologies"},{"issue":"5","key":"16438_CR48","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1080\/03772063.2017.1369370","volume":"64","author":"V Kadyan","year":"2018","unstructured":"Kadyan V, Mantri A, Aggarwal RK (2018) Refinement of HMM Model Parameters for Punjabi Automatic Speech Recognition (PASR) System. IETE Journal of Research 64(5):1\u201316. https:\/\/doi.org\/10.1080\/03772063.2017.1369370","journal-title":"IETE Journal of Research"},{"issue":"2","key":"16438_CR49","doi-asserted-by":"publisher","first-page":"517","DOI":"10.1007\/s10772-021-09814-2","volume":"24","author":"V Kadyan","year":"2021","unstructured":"Kadyan V, Dua M, Dhiman P (2021) Enhancing accuracy of long contextual dependencies for Punjabi speech recognition system using deep LSTM. International Journal of Speech Technology 24(2):517\u2013527. https:\/\/doi.org\/10.1007\/s10772-021-09814-2","journal-title":"International Journal of Speech Technology"},{"issue":"1","key":"16438_CR50","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1007\/s10772-018-09580-8","volume":"22","author":"M Kalamani","year":"2019","unstructured":"Kalamani M, Krishnamoorthi M, Valarmathi RS (2019) Continuous Tamil Speech Recognition technique under non stationary noisy environments. International Journal of Speech Technology 22(1):47\u201358. https:\/\/doi.org\/10.1007\/s10772-018-09580-8","journal-title":"International Journal of Speech Technology"},{"issue":"1","key":"16438_CR51","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13636-018-0128-6","volume":"6","author":"J Kang","year":"2018","unstructured":"Kang J, Zhang W-Q, Liu W-W, Liu J, Johnson MT (2018) Advanced recurrent network-based hybrid acoustic models for low resource speech recognition. EURASIP Journal on Audio, Speech, and Music Processing 6(1):1\u201315. https:\/\/doi.org\/10.1186\/s13636-018-0128-6","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"key":"16438_CR52","doi-asserted-by":"publisher","unstructured":"Wang J (2020) Speech recognition in English cultural promotion via recurrent neural network. Pers Ubiquit Comput 24(2): 237\u2013246. https:\/\/doi.org\/10.1007\/s00779-019-01293-2","DOI":"10.1007\/s00779-019-01293-2"},{"key":"16438_CR53","doi-asserted-by":"publisher","unstructured":"Watanabe S, Hori T, Karita S, Hayashi T, Nishitoba J, Unno Y, Enrique Yalta Soplin N, Heymann J, Wiesner M, Chen N, Renduchintala A, Ochiai T (2018) ESPnet: End-to-End Speech Processing Toolkit. In: Interspeech 2018 pp. 2207\u20132211. ISCA ISCA. https:\/\/doi.org\/10.21437\/Interspeech.2018-1456. http:\/\/arxiv.org\/abs\/1804.00015http:\/\/www.isca-speech.org\/archive\/Interspeech_2018\/abstracts\/1456.html","DOI":"10.21437\/Interspeech.2018-1456"},{"issue":"6","key":"16438_CR54","doi-asserted-by":"publisher","first-page":"599","DOI":"10.1080\/17549507.2018.1510033","volume":"20","author":"J Keshet","year":"2018","unstructured":"Keshet J (2018) Automatic speech recognition: A primer for speech-language pathology researchers. International Journal of Speech-Language Pathology 20(6):599\u2013609. https:\/\/doi.org\/10.1080\/17549507.2018.1510033","journal-title":"International Journal of Speech-Language Pathology"},{"key":"16438_CR55","doi-asserted-by":"publisher","unstructured":"Bang J.-U, Yun S, Kim S-H, Choi M-Y, Lee M-K, Kim Y-J, Kim D-H, Park J, Lee Y-J, Kim S-H (2020) KsponSpeech: Korean Spontaneous Speech Corpus for Automatic Speech Recognition. Applied Sciences 10(19): 1\u201317. https:\/\/doi.org\/10.3390\/app10196936","DOI":"10.3390\/app10196936"},{"issue":"1","key":"16438_CR56","doi-asserted-by":"publisher","first-page":"109","DOI":"10.4218\/etrij.2017-0087","volume":"41","author":"D Kim","year":"2019","unstructured":"Kim D, Kim S (2019) Fast speaker adaptation using extended diagonal linear transformation for deep neural networks. ETRI Journal 41(1):109\u2013116. https:\/\/doi.org\/10.4218\/etrij.2017-0087","journal-title":"ETRI Journal"},{"key":"16438_CR57","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.simpa.2021.100054","volume":"7","author":"S Kim","year":"2021","unstructured":"Kim S, Bae S, Won C (2021) Open-source toolkit for end-to-end Korean speech recognition. Software Impacts 7:1\u20134. https:\/\/doi.org\/10.1016\/j.simpa.2021.100054","journal-title":"Software Impacts"},{"key":"16438_CR58","doi-asserted-by":"publisher","unstructured":"Han Z, Zhao H, Wang R (2019) Transfer Learning for Speech Emotion Recognition. In: 2019 IEEE 5th Intl Conference on Big Data Security on Cloud (BigDataSecurity), IEEE Intl Conference on High Performance and Smart Computing, (HPSC) and IEEE Intl Conference on Intelligent Data and Security (IDS) pp. 96\u201399. IEEE ???. https:\/\/doi.org\/10.1109\/BigDataSecurity-HPSC-IDS.2019.00027. https:\/\/ieeexplore.ieee.org\/document\/8818976\/","DOI":"10.1109\/BigDataSecurity-HPSC-IDS.2019.00027"},{"issue":"5","key":"16438_CR59","doi-asserted-by":"publisher","first-page":"858","DOI":"10.1134\/S0005117917050083","volume":"78","author":"IS Kipyatkova","year":"2017","unstructured":"Kipyatkova IS, Karpov AA (2017) A study of neural network Russian language models for automatic continuous speech recognition systems. Autom Remote Control 78(5):858\u2013867. https:\/\/doi.org\/10.1134\/S0005117917050083","journal-title":"Autom Remote Control"},{"issue":"1","key":"16438_CR60","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13636-020-00193-1","volume":"2021","author":"N Kitaoka","year":"2021","unstructured":"Kitaoka N, Chen B, Obashi Y (2021) Dynamic out-of-vocabulary word registration to language model for speech recognition. Eurasip Journal on Audio, Speech, and Music Processing 2021(1):1\u20138. https:\/\/doi.org\/10.1186\/s13636-020-00193-1","journal-title":"Eurasip Journal on Audio, Speech, and Music Processing"},{"issue":"2","key":"16438_CR61","doi-asserted-by":"publisher","first-page":"1617","DOI":"10.1007\/s00500-020-05248-1","volume":"25","author":"Y Kumar","year":"2021","unstructured":"Kumar Y, Singh N, Kumar M, Singh A (2021) AutoSSR: an efficient approach for automatic spontaneous speech recognition model for the Punjabi Language. Soft Computing 25(2):1617\u20131630. https:\/\/doi.org\/10.1007\/s00500-020-05248-1","journal-title":"Soft Computing"},{"key":"16438_CR62","doi-asserted-by":"publisher","unstructured":"Song Z (2020) English speech recognition based on deep learning with multiple features. Computing 102(3): 663\u2013682. https:\/\/doi.org\/10.1007\/s00607-019-00753-0","DOI":"10.1007\/s00607-019-00753-0"},{"key":"16438_CR63","doi-asserted-by":"publisher","unstructured":"T\u00f3th L (2015) Phone recognition with hierarchical convolutional deep maxout networks. EURASIP Journal on Audio, Speech, and Music Processing 25(1): 1\u201313. https:\/\/doi.org\/10.1186\/s13636-015-0068-3","DOI":"10.1186\/s13636-015-0068-3"},{"key":"16438_CR64","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1016\/j.heares.2016.10.004","volume":"349","author":"CG Le Prell","year":"2017","unstructured":"Le Prell CG, Clavier OH (2017) Effects of noise on speech recognition: Challenges for communication by service members. Hearing Research 349:76\u201389. https:\/\/doi.org\/10.1016\/j.heares.2016.10.004","journal-title":"Hearing Research"},{"issue":"23","key":"16438_CR65","doi-asserted-by":"publisher","first-page":"24917","DOI":"10.1007\/s11042-016-4122-7","volume":"76","author":"S Lee","year":"2017","unstructured":"Lee S, Chang JH (2017) Spectral difference for statistical model-based speech enhancement in speech recognition. Multimedia Tools and Applications 76(23):24917\u201324929. https:\/\/doi.org\/10.1007\/s11042-016-4122-7","journal-title":"Multimedia Tools and Applications"},{"issue":"2","key":"16438_CR66","doi-asserted-by":"publisher","first-page":"483","DOI":"10.1007\/s10772-021-09807-1","volume":"24","author":"KR Lekshmi","year":"2021","unstructured":"Lekshmi KR, Sherly E (2021) An acoustic model and linguistic analysis for Malayalam disyllabic words: a low resource language. International Journal of Speech Technology 24(2):483\u2013495. https:\/\/doi.org\/10.1007\/s10772-021-09807-1","journal-title":"International Journal of Speech Technology"},{"key":"16438_CR67","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1016\/j.neucom.2020.11.025","volume":"428","author":"Z Li","year":"2021","unstructured":"Li Z, Ming Y, Yang L, Xue J-H (2021) Mutual-learning sequence-level knowledge distillation for automatic speech recognition. Neurocomputing 428:259\u2013267. https:\/\/doi.org\/10.1016\/j.neucom.2020.11.025","journal-title":"Neurocomputing"},{"key":"16438_CR68","doi-asserted-by":"publisher","unstructured":"Passricha V, Aggarwal RK (2020) A comparative analysis of pooling strategies for convolutional neural network based Hindi ASR. Journal of Ambient Intelligence and Humanized Computing 11(2): 675\u2013691. https:\/\/doi.org\/10.1007\/s12652-019-01325-y","DOI":"10.1007\/s12652-019-01325-y"},{"key":"16438_CR69","doi-asserted-by":"publisher","unstructured":"Cai M, Liu J (2016) Maxout neurons for deep convolutional and LSTM neural networks in speech recognition. Speech Communication 77, 53\u201364. https:\/\/doi.org\/10.1016\/j.specom.2015.12.003","DOI":"10.1016\/j.specom.2015.12.003"},{"key":"16438_CR70","doi-asserted-by":"publisher","unstructured":"Bingol MC, Aydogmus O (2020) Performing predefined tasks using the human-robot interaction on speech recognition for an industrial robot. Eng Appl Artif Intell 95(August): 103903. https:\/\/doi.org\/10.1016\/j.engappai.2020.103903","DOI":"10.1016\/j.engappai.2020.103903"},{"issue":"4","key":"16438_CR71","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1111\/cogs.12823","volume":"44","author":"JS Magnuson","year":"2020","unstructured":"Magnuson JS, You H, Luthra S, Li M, Nam H, Escab\u00ed M, Brown K, Allopenna PD, Theodore RM, Monto N, Rueckl JG (2020) EARSHOT: A Minimal Neural Network Model of Incremental Human Speech Recognition. Cognitive Science 44(4):1\u201317. https:\/\/doi.org\/10.1111\/cogs.12823","journal-title":"Cognitive Science"},{"key":"16438_CR72","doi-asserted-by":"publisher","unstructured":"Zia T, Zahid U (2019) Long short-term memory recurrent neural network architectures for Urdu acoustic modeling. International Journal of Speech Technology 22(1): 21\u201330. https:\/\/doi.org\/10.1007\/s10772-018-09573-7","DOI":"10.1007\/s10772-018-09573-7"},{"key":"16438_CR73","doi-asserted-by":"publisher","unstructured":"Zia T, Zahid U (2019) Long short-term memory recurrent neural network architectures for Urdu acoustic modeling. International Journal of Speech Technology 22(1): 21\u201330. https:\/\/doi.org\/10.1007\/s10772-018-09573-7","DOI":"10.1007\/s10772-018-09573-7"},{"key":"16438_CR74","doi-asserted-by":"publisher","unstructured":"Zhang Y, Zhang P, Yan Y (2019) Language Model Score Regularization for Speech Recognition. Chin J Electron 28(3): 604\u2013609. https:\/\/doi.org\/10.1049\/cje.2019.03.015","DOI":"10.1049\/cje.2019.03.015"},{"key":"16438_CR75","doi-asserted-by":"publisher","unstructured":"Hou J, Guo W, Song Y, Dai L-R (2020) Segment boundary detection directed attention for online end-to-end speech recognition. EURASIP Journal on Audio, Speech, and Music Processing 2020(1): 3. https:\/\/doi.org\/10.1186\/s13636-020-0170-z","DOI":"10.1186\/s13636-020-0170-z"},{"issue":"8","key":"16438_CR76","doi-asserted-by":"publisher","first-page":"3406","DOI":"10.1007\/s00034-019-01157-3","volume":"38","author":"T Ogunfunmi","year":"2019","unstructured":"Ogunfunmi T, Ramachandran RP, Togneri R, Zhao Y, Xia X (2019) A Primer on Deep Learning Architectures and Applications in Speech Processing. Circuits, Systems, and Signal Processing 38(8):3406\u20133432. https:\/\/doi.org\/10.1007\/s00034-019-01157-3","journal-title":"Circuits, Systems, and Signal Processing"},{"issue":"1","key":"16438_CR77","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41598-022-12260-y","volume":"12","author":"M Orken","year":"2022","unstructured":"Orken M, Dina O, Keylan A, Tolganay T, Mohamed O (2022) A study of transformer-based end-to-end speech recognition system for Kazakh language. Scientific Reports 12(1):1\u201311. https:\/\/doi.org\/10.1038\/s41598-022-12260-y","journal-title":"Scientific Reports"},{"issue":"4","key":"16438_CR78","doi-asserted-by":"publisher","first-page":"240","DOI":"10.1080\/02564602.2015.1010611","volume":"32","author":"J Padmanabhan","year":"2015","unstructured":"Padmanabhan J, Johnson Premkumar MJ (2015) Machine Learning in Automatic Speech Recognition: A Survey. IETE Technical Review 32(4):240\u2013251. https:\/\/doi.org\/10.1080\/02564602.2015.1010611","journal-title":"IETE Technical Review"},{"issue":"January","key":"16438_CR79","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1016\/j.specom.2019.01.004","volume":"108","author":"D Palaz","year":"2019","unstructured":"Palaz D, Magimai-Doss M, Collobert R (2019) End-to-end acoustic modeling using convolutional neural networks for HMM-based automatic speech recognition. Speech Communication 108(January):15\u201332. https:\/\/doi.org\/10.1016\/j.specom.2019.01.004","journal-title":"Speech Communication"},{"key":"16438_CR80","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1016\/j.neucom.2019.11.021","volume":"380","author":"H Pan","year":"2020","unstructured":"Pan H, Niu X, Li R, Dou Y, Jiang H (2020) Annealed gradient descent for deep learning. Neurocomputing 380:201\u2013211. https:\/\/doi.org\/10.1016\/j.neucom.2019.11.021","journal-title":"Neurocomputing"},{"issue":"3","key":"16438_CR81","doi-asserted-by":"publisher","first-page":"601","DOI":"10.1007\/s10772-018-09584-4","volume":"22","author":"V Passricha","year":"2019","unstructured":"Passricha V, Aggarwal RK (2019) Convolutional support vector machines for speech recognition. International Journal of Speech Technology 22(3):601\u2013609. https:\/\/doi.org\/10.1007\/s10772-018-09584-4","journal-title":"International Journal of Speech Technology"},{"issue":"2","key":"16438_CR82","doi-asserted-by":"publisher","first-page":"675","DOI":"10.1007\/s12652-019-01325-y","volume":"11","author":"V Passricha","year":"2020","unstructured":"Passricha V, Aggarwal RK (2020) A comparative analysis of pooling strategies for convolutional neural network based Hindi ASR. Journal of Ambient Intelligence and Humanized Computing 11(2):675\u2013691. https:\/\/doi.org\/10.1007\/s12652-019-01325-y","journal-title":"Journal of Ambient Intelligence and Humanized Computing"},{"key":"16438_CR83","doi-asserted-by":"publisher","unstructured":"Ravanelli M, Omologo M (2018) Automatic context window composition for distant speech recognition. Speech Communication 101, 34\u201344. https:\/\/doi.org\/10.1016\/j.specom.2018.05.001arXiv:1805.10498","DOI":"10.1016\/j.specom.2018.05.001"},{"issue":"1","key":"16438_CR84","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1080\/02522667.2017.1372908","volume":"39","author":"H Patel","year":"2018","unstructured":"Patel H, Thakkar A, Pandya M, Makwana K (2018) Neural network with deep learning architectures. J Inf Optim Sci 39(1):31\u201338. https:\/\/doi.org\/10.1080\/02522667.2017.1372908","journal-title":"J Inf Optim Sci"},{"issue":"10","key":"16438_CR85","doi-asserted-by":"publisher","first-page":"15563","DOI":"10.1007\/s11042-020-10329-2","volume":"80","author":"MD Pawar","year":"2021","unstructured":"Pawar MD, Kokate RD (2021) Convolution neural network based automatic speech emotion recognition using Mel-frequency Cepstrum coefficients. Multimedia Tools and Applications 80(10):15563\u201315587. https:\/\/doi.org\/10.1007\/s11042-020-10329-2","journal-title":"Multimedia Tools and Applications"},{"key":"16438_CR86","doi-asserted-by":"publisher","unstructured":"Li R, Wang X, Mallidi SH, Watanabe S, Hori T, Hermansky H (2020) Multi-Stream End-to-End Speech Recognition. IEEE\/ACM Transactions on Audio, Speech, and Language Processing 28(8): 646\u2013655 arXiv:1906.08041. https:\/\/doi.org\/10.1109\/TASLP.2019.2959721","DOI":"10.1109\/TASLP.2019.2959721"},{"key":"16438_CR87","doi-asserted-by":"publisher","unstructured":"Yoon JW, Woo BJ, Ahn S, Lee H, Kim NS (2022) Inter-KD: Intermediate Knowledge Distillation for CTC-Based Automatic Speech Recognition. In: 2022 IEEE Spoken Language Technology Workshop (SLT) pp. 280\u2013286. IEEE ???. https:\/\/doi.org\/10.1109\/SLT54892.2023.10022581. https:\/\/ieeexplore.ieee.org\/document\/10022581\/","DOI":"10.1109\/SLT54892.2023.10022581"},{"key":"16438_CR88","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2020.101103","volume":"64","author":"G Pironkov","year":"2020","unstructured":"Pironkov G, Wood SU, Dupont S (2020) Hybrid-task learning for robust automatic speech recognition. Computer Speech and Language 64:101103. https:\/\/doi.org\/10.1016\/j.csl.2020.101103","journal-title":"Computer Speech and Language"},{"issue":"1","key":"16438_CR89","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1007\/s00034-019-01189-9","volume":"39","author":"PS Praveen Kumar","year":"2020","unstructured":"Praveen Kumar PS, Thimmaraja Yadava G, Jayanna HS (2020) Continuous Kannada Speech Recognition System Under Degraded Condition. Circuits, Systems, and Signal Processing 39(1):391\u2013419. https:\/\/doi.org\/10.1007\/s00034-019-01189-9","journal-title":"Circuits, Systems, and Signal Processing"},{"issue":"January","key":"16438_CR90","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.specom.2019.08.006","volume":"114","author":"Y Qian","year":"2019","unstructured":"Qian Y, Hu H, Tan T (2019) Data augmentation using generative adversarial networks for robust speech recognition. Speech Communication 114(January):1\u20139. https:\/\/doi.org\/10.1016\/j.specom.2019.08.006","journal-title":"Speech Communication"},{"key":"16438_CR91","doi-asserted-by":"publisher","unstructured":"Zoughi T, Homayounpour MM (2019) A Gender-Aware Deep Neural Network Structure for Speech Recognition. Iranian Journal of Science and Technology, Transactions of Electrical Engineering 43(3): 1\u201310. https:\/\/doi.org\/10.1007\/s40998-019-00177-8","DOI":"10.1007\/s40998-019-00177-8"},{"key":"16438_CR92","doi-asserted-by":"publisher","unstructured":"Praveen Kumar PS, Thimmaraja Yadava G, Jayanna HS (2020) Continuous Kannada Speech Recognition System Under Degraded Condition. Circuits, Systems, and Signal Processing 39(1): 391\u2013419. https:\/\/doi.org\/10.1007\/s00034-019-01189-9","DOI":"10.1007\/s00034-019-01189-9"},{"key":"16438_CR93","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1186\/s13636-018-0141-9","volume":"1","author":"C-X Qin","year":"2018","unstructured":"Qin C-X, Qu D (2018) Zhang L-H (2018) Towards end-to-end speech recognition with transfer learning. EURASIP Journal on Audio, Speech, and Music Processing 1:18. https:\/\/doi.org\/10.1186\/s13636-018-0141-9","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"key":"16438_CR94","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1186\/s13636-021-00199-3","volume":"1","author":"K Radzikowski","year":"2021","unstructured":"Radzikowski K, Wang L, Yoshie O (2021) Nowak R (2021) Accent modification for speech recognition of non-native speakers using neural style transfer. EURASIP Journal on Audio, Speech, and Music Processing 1:11. https:\/\/doi.org\/10.1186\/s13636-021-00199-3","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"key":"16438_CR95","doi-asserted-by":"publisher","first-page":"54","DOI":"10.1016\/j.dsp.2018.06.004","volume":"82","author":"MH Rahmani","year":"2018","unstructured":"Rahmani MH, Almasganj F, Seyyedsalehi SA (2018) Audio-visual feature fusion via deep neural networks for automatic speech recognition. Digital Signal Processing: A Review Journal 82:54\u201363. https:\/\/doi.org\/10.1016\/j.dsp.2018.06.004","journal-title":"Digital Signal Processing: A Review Journal"},{"issue":"2","key":"16438_CR96","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1007\/s10772-020-09687-x","volume":"23","author":"S Rajendran","year":"2020","unstructured":"Rajendran S, Jayagopal P (2020) Preserving learnability and intelligibility at the point of care with assimilation of different speech recognition techniques. International Journal of Speech Technology 23(2):265\u2013276. https:\/\/doi.org\/10.1007\/s10772-020-09687-x","journal-title":"International Journal of Speech Technology"},{"key":"16438_CR97","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.csl.2019.101057","volume":"62","author":"PB Ramteke","year":"2020","unstructured":"Ramteke PB, Supanekar S, Koolagudi SG (2020) Classification of aspirated and unaspirated sounds in speech using excitation and signal level information. Computer Speech and Language 62:1\u201318. https:\/\/doi.org\/10.1016\/j.csl.2019.101057","journal-title":"Computer Speech and Language"},{"issue":"2","key":"16438_CR98","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1109\/TETCI.2017.2762739","volume":"2","author":"M Ravanelli","year":"2018","unstructured":"Ravanelli M, Brakel P, Omologo M, Bengio Y (2018) Light Gated Recurrent Units for Speech Recognition. IEEE Transactions on Emerging Topics in Computational Intelligence 2(2):92\u2013102. https:\/\/doi.org\/10.1109\/TETCI.2017.2762739","journal-title":"IEEE Transactions on Emerging Topics in Computational Intelligence"},{"key":"16438_CR99","doi-asserted-by":"publisher","unstructured":"Garain A, Singh PK, Sarkar R (2021) FuzzyGCP: A deep learning architecture for automatic spoken language identification from speech signals. Expert Systems with Applications 168(June 2020): 1\u201314. https:\/\/doi.org\/10.1016\/j.eswa.2020.114416","DOI":"10.1016\/j.eswa.2020.114416"},{"issue":"8","key":"16438_CR100","doi-asserted-by":"publisher","first-page":"3501","DOI":"10.1007\/s00034-019-01130-0","volume":"38","author":"A Sabzi Shahrebabaki","year":"2019","unstructured":"Sabzi Shahrebabaki A, Imran AS, Olfati N, Svendsen T (2019) A Comparative Study of Deep Learning Techniques on Frame-Level Speech Data Classification. Circuits, Systems, and Signal Processing 38(8):3501\u20133520. https:\/\/doi.org\/10.1007\/s00034-019-01130-0","journal-title":"Circuits, Systems, and Signal Processing"},{"key":"16438_CR101","doi-asserted-by":"publisher","unstructured":"Li Z, Ming Y, Yang L, Xue J-H (2021) Mutual-learning sequence-level knowledge distillation for automatic speech recognition. Neurocomputing 428, 259\u2013267. https:\/\/doi.org\/10.1016\/j.neucom.2020.11.025","DOI":"10.1016\/j.neucom.2020.11.025"},{"key":"16438_CR102","doi-asserted-by":"publisher","unstructured":"Tong R, Wang L, Ma B (2017) Transfer learning for children\u2019s speech recognition. In: 2017 International Conference on Asian Language Processing (IALP) vol. 2018-Janua pp. 36\u201339. IEEE ???. https:\/\/doi.org\/10.1109\/IALP.2017.8300540. http:\/\/ieeexplore.ieee.org\/document\/8300540\/","DOI":"10.1109\/IALP.2017.8300540"},{"issue":"4","key":"16438_CR103","doi-asserted-by":"publisher","first-page":"1008","DOI":"10.1002\/cae.21952","volume":"26","author":"RR Saifan","year":"2018","unstructured":"Saifan RR, Dweik W, Abdel-Majeed M (2018) A machine learning based deaf assistance digital system. Comput Appl Eng Educ 26(4):1008\u20131019. https:\/\/doi.org\/10.1002\/cae.21952","journal-title":"Comput Appl Eng Educ"},{"key":"16438_CR104","doi-asserted-by":"publisher","unstructured":"Liu D, Mao Q, Wang Z (2020) Keyword retrieving in continuous speech using connectionist temporal classification. Journal of Ambient Intelligence and Humanized Computing (0123456789). https:\/\/doi.org\/10.1007\/s12652-020-01933-z","DOI":"10.1007\/s12652-020-01933-z"},{"key":"16438_CR105","doi-asserted-by":"publisher","unstructured":"Becerra A, de\u00a0la Rosa JI, Gonz\u00e1lez E, Pedroza AD, Escalante NI, Santos E (2020) A comparative case study of neural network training by using frame-level cost functions for automatic speech recognition purposes in Spanish. Multimedia Tools and Applications 79(27-28): 19669\u201319715. https:\/\/doi.org\/10.1007\/s11042-020-08782-0","DOI":"10.1007\/s11042-020-08782-0"},{"issue":"3","key":"16438_CR106","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1080\/02564602.2017.1293570","volume":"35","author":"BD Sarma","year":"2018","unstructured":"Sarma BD, Prasanna SRM (2018) Acoustic-Phonetic Analysis for Speech Recognition: A Review. IETE Technical Review 35(3):1\u201324. https:\/\/doi.org\/10.1080\/02564602.2017.1293570","journal-title":"IETE Technical Review"},{"key":"16438_CR107","doi-asserted-by":"publisher","unstructured":"Mikolov T, Kombrink S, Burget L, Cernocky J, Khudanpur S (2011) Extensions of recurrent neural network language model. In: 2011 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) pp. 5528\u20135531. IEEE ???. https:\/\/doi.org\/10.1109\/ICASSP.2011.5947611. http:\/\/ieeexplore.ieee.org\/document\/5947611\/","DOI":"10.1109\/ICASSP.2011.5947611"},{"key":"16438_CR108","doi-asserted-by":"publisher","unstructured":"Shi Y, Zhang W-Q, Liu J, Johnson MT (2013) RNN language model with word clustering and class-based output layer. EURASIP Journal on Audio, Speech, and Music Processing 2013(1): 22. https:\/\/doi.org\/10.1186\/1687-4722-2013-22","DOI":"10.1186\/1687-4722-2013-22"},{"key":"16438_CR109","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.cosrev.2020.100301","volume":"38","author":"V Sharma","year":"2020","unstructured":"Sharma V, Mir RN (2020) A comprehensive and systematic look up into deep learning based object detection techniques: A review. Computer Science Review 38:1\u201329. https:\/\/doi.org\/10.1016\/j.cosrev.2020.100301","journal-title":"Computer Science Review"},{"issue":"2","key":"16438_CR110","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1007\/s40012-016-0145-5","volume":"5","author":"M Sharma","year":"2017","unstructured":"Sharma M, Sarma KK (2017) Soft computation based spectral and temporal models of linguistically motivated Assamese telephonic conversation recognition. CSI Transactions on ICT 5(2):209\u2013216. https:\/\/doi.org\/10.1007\/s40012-016-0145-5","journal-title":"CSI Transactions on ICT"},{"issue":"1","key":"16438_CR111","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1016\/j.gltp.2021.01.004","volume":"2","author":"N Sharma","year":"2021","unstructured":"Sharma N, Sharma R, Jindal N (2021) Machine Learning and Deep Learning Applications-A Vision. Global Transitions Proceedings 2(1):24\u201328. https:\/\/doi.org\/10.1016\/j.gltp.2021.01.004","journal-title":"Global Transitions Proceedings"},{"issue":"2019","key":"16438_CR112","doi-asserted-by":"publisher","first-page":"1381","DOI":"10.1016\/j.procs.2020.04.148","volume":"171","author":"R Sharmin","year":"2020","unstructured":"Sharmin R, Rahut SK, Huq MR (2020) Bengali Spoken Digit Classification: A Deep Learning Approach Using Convolutional Neural Network. Procedia Computer Science 171(2019):1381\u20131388. https:\/\/doi.org\/10.1016\/j.procs.2020.04.148","journal-title":"Procedia Computer Science"},{"key":"16438_CR113","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1186\/1687-4722-2013-22","volume":"1","author":"Y Shi","year":"2013","unstructured":"Shi Y, Zhang W-Q, Liu J (2013) Johnson MT (2013) RNN language model with word clustering and class-based output layer. EURASIP Journal on Audio, Speech, and Music Processing 1:22. https:\/\/doi.org\/10.1186\/1687-4722-2013-22","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"key":"16438_CR114","doi-asserted-by":"publisher","unstructured":"Wellsandt S, Foosherian M, Thoben K-D (2020) Interacting with a Digital Twin using Amazon Alexa. In: Procedia Manufacturing vol. 52 pp. 4\u20138. Elsevier B.V. ???. https:\/\/doi.org\/10.1016\/j.promfg.2020.11.002. https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S2351978920321430","DOI":"10.1016\/j.promfg.2020.11.002"},{"issue":"2","key":"16438_CR115","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1080\/08874417.2016.1183423","volume":"57","author":"V Silber-Varod","year":"2017","unstructured":"Silber-Varod V, Winer A, Geri N (2017) Opening the Knowledge Dam: Speech Recognition for Video Search. J Comput Inf Syst 57(2):106\u2013111. https:\/\/doi.org\/10.1080\/08874417.2016.1183423","journal-title":"J Comput Inf Syst"},{"key":"16438_CR116","doi-asserted-by":"publisher","unstructured":"Baevski A, Mohamed A (2020) Effectiveness of Self-Supervised Pre-Training for ASR. In: ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) pp. 7694\u20137698. IEEE ???. https:\/\/doi.org\/10.1109\/ICASSP40776.2020.9054224. https:\/\/ieeexplore.ieee.org\/document\/9054224\/","DOI":"10.1109\/ICASSP40776.2020.9054224"},{"key":"16438_CR117","doi-asserted-by":"crossref","unstructured":"Sadhu S, He D, Huang C.-W, Mallidi S.H, Wu M, Rastrow A, Stolcke A, Droppo J, Maas R: Wav2vec-C: A Self-supervised Model for Speech Representation Learning 1\u201319 (2021) arXiv:2103.08393","DOI":"10.21437\/Interspeech.2021-717"},{"key":"16438_CR118","doi-asserted-by":"publisher","unstructured":"Soh KW, Loo JHY (2020) A review of Mandarin speech recognition test materials for use in Singapore. Int J Audiol 1\u201313. https:\/\/doi.org\/10.1080\/14992027.2020.1826587","DOI":"10.1080\/14992027.2020.1826587"},{"issue":"3","key":"16438_CR119","doi-asserted-by":"publisher","first-page":"663","DOI":"10.1007\/s00607-019-00753-0","volume":"102","author":"Z Song","year":"2020","unstructured":"Song Z (2020) English speech recognition based on deep learning with multiple features. Computing 102(3):663\u2013682. https:\/\/doi.org\/10.1007\/s00607-019-00753-0","journal-title":"Computing"},{"key":"16438_CR120","doi-asserted-by":"publisher","unstructured":"Hernandez F, Nguyen V, Ghannay S, Tomashenko N, Est\u00e9ve Y: TED-LIUM 3: Twice as Much Data and Corpus Repartition for Experiments on Speaker Adaptation. In: Lecture Notes in Computer Science (including Subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics) pp. 198\u2013208. Springer ??? (2018). https:\/\/doi.org\/10.1007\/978-3-319-99579-3_21. http:\/\/link.springer.com\/10.1007\/978-3-319-99579-3_21","DOI":"10.1007\/978-3-319-99579-3_21"},{"key":"16438_CR121","doi-asserted-by":"publisher","unstructured":"Suresh Kumar P, Behera HS, K AK, Nayak J, Naik B, (2020) Advancement from neural networks to deep learning in software effort estimation: Perspective of two decades. Computer Science Review 38:100288. https:\/\/doi.org\/10.1016\/j.cosrev.2020.100288","DOI":"10.1016\/j.cosrev.2020.100288"},{"issue":"2","key":"16438_CR122","doi-asserted-by":"publisher","first-page":"419","DOI":"10.1007\/s10772-021-09811-5","volume":"24","author":"B Syiem","year":"2021","unstructured":"Syiem B, Singh LJ (2021) Exploring end-to-end framework towards Khasi speech recognition system. International Journal of Speech Technology 24(2):419\u2013424. https:\/\/doi.org\/10.1007\/s10772-021-09811-5","journal-title":"International Journal of Speech Technology"},{"key":"16438_CR123","doi-asserted-by":"publisher","unstructured":"Carlini N, Wagner D: Audio Adversarial Examples: Targeted Attacks on Speech-to-Text. In: 2018 IEEE Security and Privacy Workshops (SPW) pp. 1\u20137. IEEE ??? (2018). https:\/\/doi.org\/10.1109\/SPW.2018.00009. https:\/\/ieeexplore.ieee.org\/document\/8424625\/","DOI":"10.1109\/SPW.2018.00009"},{"key":"16438_CR124","doi-asserted-by":"publisher","unstructured":"Kumar Y, Singh N, Kumar M, Singh A: AutoSSR: an efficient approach for automatic spontaneous speech recognition model for the Punjabi Language. Soft Computing 25(2): 1617\u20131630 (2021). https:\/\/doi.org\/10.1007\/s00500-020-05248-1","DOI":"10.1007\/s00500-020-05248-1"},{"issue":"1","key":"16438_CR125","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13636-015-0068-3","volume":"25","author":"L T\u00f3th","year":"2015","unstructured":"T\u00f3th L (2015) Phone recognition with hierarchical convolutional deep maxout networks. EURASIP Journal on Audio, Speech, and Music Processing 25(1):1\u201313. https:\/\/doi.org\/10.1186\/s13636-015-0068-3","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"issue":"3","key":"16438_CR126","doi-asserted-by":"publisher","first-page":"489","DOI":"10.1007\/s10772-017-9483-4","volume":"21","author":"K Tripathi","year":"2018","unstructured":"Tripathi K, Rao KS (2018) Improvement of phone recognition accuracy using speech mode classification. International Journal of Speech Technology 21(3):489\u2013500. https:\/\/doi.org\/10.1007\/s10772-017-9483-4","journal-title":"International Journal of Speech Technology"},{"key":"16438_CR127","unstructured":"Caranica A, Cucu H, Buzo A, Burileanu C: On the design of an automatic speech recognition system for Romanian language. Control Engineering and Applied Informatics 18(2): 65\u201376 (2016)"},{"issue":"7","key":"16438_CR128","doi-asserted-by":"publisher","first-page":"963","DOI":"10.1007\/s11265-017-1295-x","volume":"90","author":"Y-H Tu","year":"2018","unstructured":"Tu Y-H, Du J, Lee C-H (2018) A Speaker-Dependent Approach to Single-Channel Joint Speech Separation and Acoustic Modeling Based on Deep Neural Networks for Robust Recognition of Multi-Talker Speech. Journal of Signal Processing Systems 90(7):963\u2013973. https:\/\/doi.org\/10.1007\/s11265-017-1295-x","journal-title":"Journal of Signal Processing Systems"},{"key":"16438_CR129","doi-asserted-by":"publisher","unstructured":"Tu Y-H, Du J, Sun L, Ma F, Wang H-K, Chen J-D, Lee C-H (2019) An iterative mask estimation approach to deep learning based multi-channel speech recognition. Speech Communication 106 (2018):31\u201343. https:\/\/doi.org\/10.1016\/j.specom.2018.11.005","DOI":"10.1016\/j.specom.2018.11.005"},{"issue":"1","key":"16438_CR130","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13634-015-0278-y","volume":"92","author":"Y Ueda","year":"2015","unstructured":"Ueda Y, Wang L, Kai A, Ren B (2015) Environment-dependent denoising autoencoder for distant-talking speech recognition. EURASIP Journal on Advances in Signal Processing 92(1):1\u201311. https:\/\/doi.org\/10.1186\/s13634-015-0278-y","journal-title":"EURASIP Journal on Advances in Signal Processing"},{"issue":"4","key":"16438_CR131","doi-asserted-by":"publisher","first-page":"2495","DOI":"10.1007\/s10462-020-09907-5","volume":"54","author":"S Uma Maheswari","year":"2021","unstructured":"Uma Maheswari S, Shahina A, Nayeemulla Khan A (2021) Understanding Lombard speech: a review of compensation techniques towards improving speech based recognition systems. Artif Intell Rev 54(4):2495\u20132523. https:\/\/doi.org\/10.1007\/s10462-020-09907-5","journal-title":"Artif Intell Rev"},{"issue":"4","key":"16438_CR132","doi-asserted-by":"publisher","first-page":"893","DOI":"10.1007\/s10772-020-09768-x","volume":"23","author":"H Veisi","year":"2020","unstructured":"Veisi H, Haji Mani A (2020) Persian speech recognition using deep learning. International Journal of Speech Technology 23(4):893\u2013905. https:\/\/doi.org\/10.1007\/s10772-020-09768-x","journal-title":"International Journal of Speech Technology"},{"issue":"2","key":"16438_CR133","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1007\/s00779-019-01293-2","volume":"24","author":"J Wang","year":"2020","unstructured":"Wang J (2020) Speech recognition in English cultural promotion via recurrent neural network. Pers Ubiquit Comput 24(2):237\u2013246. https:\/\/doi.org\/10.1007\/s00779-019-01293-2","journal-title":"Pers Ubiquit Comput"},{"issue":"2","key":"16438_CR134","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.joto.2020.12.001","volume":"16","author":"X Wang","year":"2021","unstructured":"Wang X, Xu L (2021) Speech perception in noise: Masking and unmasking. J Otol 16(2):1\u201311. https:\/\/doi.org\/10.1016\/j.joto.2020.12.001","journal-title":"J Otol"},{"issue":"1","key":"16438_CR135","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1186\/s13677-020-00186-7","volume":"9","author":"Q Wang","year":"2020","unstructured":"Wang Q, Feng C, Xu Y, Zhong H, Sheng VS (2020) A novel privacy-preserving speech recognition framework using bidirectional LSTM. Journal of Cloud Computing 9(1):36. https:\/\/doi.org\/10.1186\/s13677-020-00186-7","journal-title":"Journal of Cloud Computing"},{"issue":"4","key":"16438_CR136","doi-asserted-by":"publisher","first-page":"665","DOI":"10.1080\/0952813X.2019.1672795","volume":"32","author":"D Wang","year":"2020","unstructured":"Wang D, Zhang Y, Xin J (2020) An emergent deep developmental model for auditory learning. Journal of Experimental and Theoretical Artificial Intelligence 32(4):665\u2013684. https:\/\/doi.org\/10.1080\/0952813X.2019.1672795","journal-title":"Journal of Experimental and Theoretical Artificial Intelligence"},{"key":"16438_CR137","doi-asserted-by":"publisher","unstructured":"Kang J, Zhang W.-Q, Liu W.-W, Liu J, Johnson M.T: Lattice Based Transcription Loss for End-to-End Speech Recognition. Journal of Signal Processing Systems 90(7): 1013\u20131023 (2018). https:\/\/doi.org\/10.1007\/s11265-017-1292-0","DOI":"10.1007\/s11265-017-1292-0"},{"key":"16438_CR138","doi-asserted-by":"publisher","unstructured":"Qian Y.-m, Xiang X: Binary neural networks for speech recognition. Frontiers of Information Technology and Electronic Engineering 20(5): 701\u2013715 (2019). https:\/\/doi.org\/10.1631\/FITEE.1800469","DOI":"10.1631\/FITEE.1800469"},{"issue":"2","key":"16438_CR139","doi-asserted-by":"publisher","first-page":"378","DOI":"10.1007\/s11704-018-8030-z","volume":"14","author":"W Ying","year":"2020","unstructured":"Ying W, Zhang L, Deng H (2020) Sichuan dialect speech recognition with deep LSTM network. Frontiers of Computer Science 14(2):378\u2013387. https:\/\/doi.org\/10.1007\/s11704-018-8030-z","journal-title":"Frontiers of Computer Science"},{"key":"16438_CR140","doi-asserted-by":"publisher","unstructured":"Qian Y, Hu H, Tan T (2019) Data augmentation using generative adversarial networks for robust speech recognition. Speech Communication 114(January): 1\u20139. https:\/\/doi.org\/10.1016\/j.specom.2019.08.006","DOI":"10.1016\/j.specom.2019.08.006"},{"key":"16438_CR141","doi-asserted-by":"publisher","unstructured":"Frihia H, Bahi H (2016) Embedded Learning Segmentation Approach for Arabic Speech Recognition. In: Lecture Notes in Computer Science (including Subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics) vol. 9924 LNCS pp. 383\u2013390. Springer ???. https:\/\/doi.org\/10.1007\/978-3-319-45510-5_44. http:\/\/link.springer.com\/10.1007\/978-3-319-45510-5_44","DOI":"10.1007\/978-3-319-45510-5_44"},{"issue":"3","key":"16438_CR142","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1049\/cje.2019.03.015","volume":"28","author":"Y Zhang","year":"2019","unstructured":"Zhang Y, Zhang P, Yan Y (2019) Language Model Score Regularization for Speech Recognition. Chin J Electron 28(3):604\u2013609. https:\/\/doi.org\/10.1049\/cje.2019.03.015","journal-title":"Chin J Electron"},{"issue":"33\u201334","key":"16438_CR143","doi-asserted-by":"publisher","first-page":"24413","DOI":"10.1007\/s11042-020-09064-5","volume":"79","author":"X Zhang","year":"2020","unstructured":"Zhang X, Zhao Y, Xie J, Li C, Hu Z (2020) Geological big data acquisition based on speech recognition. Multimedia Tools and Applications 79(33\u201334):24413\u201324428. https:\/\/doi.org\/10.1007\/s11042-020-09064-5","journal-title":"Multimedia Tools and Applications"},{"key":"16438_CR144","doi-asserted-by":"publisher","unstructured":"Kim S, Bae S, Won C (2021) Open-source toolkit for end-to-end Korean speech recognition. Software Impacts 7, 1\u20134. https:\/\/doi.org\/10.1016\/j.simpa.2021.100054","DOI":"10.1016\/j.simpa.2021.100054"},{"issue":"3","key":"16438_CR145","doi-asserted-by":"publisher","first-page":"563","DOI":"10.1007\/s10772-018-9516-7","volume":"21","author":"X Zhong","year":"2018","unstructured":"Zhong X, Dai Y, Dai Y, Jin T (2018) Study on processing of wavelet speech denoising in speech recognition system. International Journal of Speech Technology 21(3):563\u2013569. https:\/\/doi.org\/10.1007\/s10772-018-9516-7","journal-title":"International Journal of Speech Technology"},{"issue":"21","key":"16438_CR146","doi-asserted-by":"publisher","first-page":"30749","DOI":"10.1007\/s11042-018-6590-4","volume":"78","author":"J Zhong","year":"2019","unstructured":"Zhong J, Zhang P, Li X (2019) Adaptive recognition of different accents conversations based on convolutional neural network. Multimedia Tools and Applications 78(21):30749\u201330767. https:\/\/doi.org\/10.1007\/s11042-018-6590-4","journal-title":"Multimedia Tools and Applications"},{"issue":"4","key":"16438_CR147","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TASLP.2015.2392944","volume":"23","author":"P Zhou","year":"2015","unstructured":"Zhou P, Jiang H, Dai L-R, Hu Y, Liu Q-F (2015) State-Clustering Based Multiple Deep Neural Networks Modeling Approach for Speech Recognition. IEEE\/ACM Transactions on Audio, Speech, and Language Processing 23(4):1\u201311. https:\/\/doi.org\/10.1109\/TASLP.2015.2392944","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"1","key":"16438_CR148","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1007\/s12204-019-2147-6","volume":"25","author":"T Zhu","year":"2020","unstructured":"Zhu T, Cheng C (2020) Joint CTC-Attention End-to-End Speech Recognition with a Triangle Recurrent Neural Network Encoder. Journal of Shanghai Jiaotong University (Science) 25(1):70\u201375. https:\/\/doi.org\/10.1007\/s12204-019-2147-6","journal-title":"Journal of Shanghai Jiaotong University (Science)"},{"issue":"1","key":"16438_CR149","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/s10772-018-09573-7","volume":"22","author":"T Zia","year":"2019","unstructured":"Zia T, Zahid U (2019) Long short-term memory recurrent neural network architectures for Urdu acoustic modeling. International Journal of Speech Technology 22(1):21\u201330. https:\/\/doi.org\/10.1007\/s10772-018-09573-7","journal-title":"International Journal of Speech Technology"},{"issue":"1","key":"16438_CR150","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/s10772-018-09573-7","volume":"22","author":"T Zia","year":"2019","unstructured":"Zia T, Zahid U (2019) Long short-term memory recurrent neural network architectures for Urdu acoustic modeling. International Journal of Speech Technology 22(1):21\u201330. https:\/\/doi.org\/10.1007\/s10772-018-09573-7","journal-title":"International Journal of Speech Technology"},{"issue":"3","key":"16438_CR151","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s40998-019-00177-8","volume":"43","author":"T Zoughi","year":"2019","unstructured":"Zoughi T, Homayounpour MM (2019) A Gender-Aware Deep Neural Network Structure for Speech Recognition. Iranian Journal of Science and Technology, Transactions of Electrical Engineering 43(3):1\u201310. https:\/\/doi.org\/10.1007\/s40998-019-00177-8","journal-title":"Iranian Journal of Science and Technology, Transactions of Electrical Engineering"},{"key":"16438_CR152","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.eswa.2019.112840","volume":"139","author":"T Zoughi","year":"2020","unstructured":"Zoughi T, Homayounpour MM, Deypir M (2020) Adaptive windows multiple deep residual networks for speech recognition. Expert Systems with Applications 139:1\u201316. https:\/\/doi.org\/10.1016\/j.eswa.2019.112840","journal-title":"Expert Systems with Applications"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16438-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-16438-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16438-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,22]],"date-time":"2024-02-22T13:38:09Z","timestamp":1708609089000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-16438-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,15]]},"references-count":152,"journal-issue":{"issue":"8","published-online":{"date-parts":[[2024,3]]}},"alternative-id":["16438"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-16438-y","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8,15]]},"assertion":[{"value":"22 December 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 May 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 July 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 August 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflicts of interest to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest\/Competing interests"}}]}}