{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T19:28:01Z","timestamp":1774639681184,"version":"3.50.1"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"17","license":[{"start":{"date-parts":[[2024,7,4]],"date-time":"2024-07-04T00:00:00Z","timestamp":1720051200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,7,4]],"date-time":"2024-07-04T00:00:00Z","timestamp":1720051200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-024-19750-3","type":"journal-article","created":{"date-parts":[[2024,7,4]],"date-time":"2024-07-04T05:01:25Z","timestamp":1720069285000},"page":"17309-17328","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Isolated word recognition based on a hyper-tuned cross-validated CNN-BiLSTM from Mel Frequency Cepstral Coefficients"],"prefix":"10.1007","volume":"84","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4485-3393","authenticated-orcid":false,"given":"Bachchu","family":"Paul","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Santanu","family":"Phadikar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Somnath","family":"Bera","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tanushree","family":"Dey","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Utpal","family":"Nandi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,7,4]]},"reference":[{"issue":"1","key":"19750_CR1","doi-asserted-by":"publisher","first-page":"27","DOI":"10.21608\/ejle.2020.47685.1015","volume":"8","author":"ER Abdelmaksoud","year":"2021","unstructured":"Abdelmaksoud ER, Hassen A, Hassan N, Hesham M (2021) Convolutional neural network for Arabic speech recognition. Egypt J Lang Eng 8(1):27\u201338","journal-title":"Egypt J Lang Eng"},{"key":"19750_CR2","doi-asserted-by":"crossref","unstructured":"Alalshekmubarak A, Smith LS (2014) On improving the classification capability of reservoir computing for Arabic speech recognition. In: International conference on artificial neural networks. Springer, Cham, pp 225\u2013232","DOI":"10.1007\/978-3-319-11179-7_29"},{"key":"19750_CR3","doi-asserted-by":"publisher","first-page":"2240227","DOI":"10.1142\/S0218348X22402277","volume":"30","author":"FS Al-Anzi","year":"2022","unstructured":"Al-Anzi FS (2022) Improved noise-resilient isolated words speech recognition using piecewise differentiation. Fractals 30:2240227","journal-title":"Fractals"},{"key":"19750_CR4","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1016\/j.specom.2022.02.005","volume":"139","author":"H Aldarmaki","year":"2022","unstructured":"Aldarmaki H, Ullah A, Ram S, Zaki N (2022) Unsupervised automatic speech recognition: a review. Speech Commun 139:76\u201391","journal-title":"Speech Commun"},{"issue":"8","key":"19750_CR5","doi-asserted-by":"publisher","first-page":"521","DOI":"10.1049\/sil2.12057","volume":"15","author":"HA Alsayadi","year":"2021","unstructured":"Alsayadi HA, Abdelhamid AA, Hegazy I, Fayed ZT (2021) Arabic speech recognition using end-to-end deep learning. IET Signal Proc 15(8):521\u2013534","journal-title":"IET Signal Proc"},{"key":"19750_CR6","doi-asserted-by":"crossref","unstructured":"Amari R, Mars A, Zrigui M (2022) Arabic speech recognition based on a CNN-BLSTM combination. In: 2022 IEEE 9th International Conference on Sciences of Electronics, Technologies of Information and Telecommunications (SETIT). IEEE, pp 259\u2013264","DOI":"10.1109\/SETIT54465.2022.9875681"},{"key":"19750_CR7","doi-asserted-by":"crossref","unstructured":"Amari R, Noubigh Z, Zrigui S, Berchech D, Nicolas H, Zrigui M (2022) Deep convolutional neural network for arabic speech recognition. In: Conference on computational collective intelligence technologies and applications. Springer, Cham, pp 120\u2013134","DOI":"10.1007\/978-3-031-16014-1_11"},{"key":"19750_CR8","doi-asserted-by":"crossref","unstructured":"Bansal M, Sircar P (2021) AFM signal model for digit recognition. In: 2021 Sixth International Conference on Wireless Communications, Signal Processing and Networking (WiSPNET). IEEE, pp 354\u2013358","DOI":"10.1109\/WiSPNET51692.2021.9419416"},{"key":"19750_CR9","first-page":"85","volume":"15","author":"A Benmachiche","year":"2019","unstructured":"Benmachiche A, Makhlouf A (2019) Optimization of hidden Markov model with Gaussian mixture densities for Arabic speech recognition. WSEAS Trans Signal Process 15:85\u201395","journal-title":"WSEAS Trans Signal Process"},{"issue":"21","key":"19750_CR10","doi-asserted-by":"publisher","first-page":"29887","DOI":"10.1007\/s11042-022-12058-0","volume":"81","author":"M Bentoumi","year":"2022","unstructured":"Bentoumi M, Daoud M, Benaouali M, Taleb Ahmed A (2022) Improvement of emotion recognition from facial images using deep learning and early stopping cross validation. Multimed Tools Appl 81(21):29887\u201329917","journal-title":"Multimed Tools Appl"},{"key":"19750_CR11","doi-asserted-by":"crossref","unstructured":"Bernard M, Poli M, Karadayi J, Dupoux E (2023) Shennong: a Python toolbox for audio speech features extraction. Behav Res Methods 1\u201313","DOI":"10.3758\/s13428-022-02029-6"},{"key":"19750_CR12","doi-asserted-by":"crossref","unstructured":"Dablain D, Krawczyk B, Chawla NV (2022) DeepSMOTE: fusing deep learning and SMOTE for imbalanced data. IEEE Trans Neural Netw Learn Syst","DOI":"10.1109\/TNNLS.2021.3136503"},{"issue":"3","key":"19750_CR13","doi-asserted-by":"publisher","first-page":"28","DOI":"10.4236\/jcc.2020.83003","volume":"8","author":"HA Elharati","year":"2020","unstructured":"Elharati HA, Alshaari M, K\u00ebpuska VZ (2020) Arabic speech recognition system based on MFCC and HMMs. J Comput Commun 8(3):28\u201334","journal-title":"J Comput Commun"},{"key":"19750_CR14","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1016\/j.patcog.2017.10.013","volume":"77","author":"J Gu","year":"2018","unstructured":"Gu J, Wang Z, Kuen J, Ma L, Shahroudy A, Shuai B, Liu T et al (2018) Recent advances in convolutional neural networks. Pattern Recognit 77:354\u2013377","journal-title":"Pattern Recognit"},{"key":"19750_CR15","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.120102","volume":"225","author":"R Kumari","year":"2023","unstructured":"Kumari R, Singh J, Gosain A (2023) SmS: SMOTE-stacked hybrid model for diagnosis of polycystic ovary syndrome using feature selection method. Expert Syst Appl 225:120102","journal-title":"Expert Syst Appl"},{"issue":"23","key":"19750_CR16","doi-asserted-by":"publisher","first-page":"17351","DOI":"10.1007\/s00521-020-04867-x","volume":"32","author":"IE Livieris","year":"2020","unstructured":"Livieris IE, Pintelas E, Pintelas P (2020) A CNN\u2013LSTM model for gold price time-series forecasting. Neural Comput Appl 32(23):17351\u201317360","journal-title":"Neural Comput Appl"},{"key":"19750_CR17","unstructured":"Lyons J, Wang DYB, Shteingart GH, Mavrinac E, Gaurkar Y, Watcharawisetkul W, \u2026 Stark A (2020) jameslyons\/python_speech_features: release v0. 6.1 (Version 0.6. 1). Zenodo"},{"key":"19750_CR18","first-page":"12","volume":"7","author":"H Mahalingam","year":"2019","unstructured":"Mahalingam H, Rajakumar M (2019) Speech recognition using multiscale scattering of audio signals and long short-term memory of neural networks. Int J Adv Comput Sci Cloud Comput 7:12\u201316","journal-title":"Int J Adv Comput Sci Cloud Comput"},{"key":"19750_CR19","doi-asserted-by":"crossref","unstructured":"Mehra S, Susan S (2023) Deep fusion framework for speech command recognition using acoustic and linguistic features. Multimed Tools Appl 1\u201325","DOI":"10.1007\/s11042-023-15118-1"},{"key":"19750_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijepes.2021.107563","volume":"135","author":"A Moradzadeh","year":"2022","unstructured":"Moradzadeh A, Teimourzadeh H, Mohammadi-Ivatloo B, Pourhossein K (2022) Hybrid CNN-LSTM approaches for identification of type and locations of transmission line faults. Int J Electr Power Energy Syst 135:107563","journal-title":"Int J Electr Power Energy Syst"},{"key":"19750_CR21","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1016\/j.specom.2019.01.002","volume":"107","author":"S Najnin","year":"2019","unstructured":"Najnin S, Banerjee B (2019) Speech recognition using cepstral articulatory features. Speech Commun 107:26\u201337","journal-title":"Speech Commun"},{"key":"19750_CR22","doi-asserted-by":"crossref","unstructured":"Obaid M, Hodrob R, Abu Mwais A, Aldababsa M (2023) Small vocabulary isolated-word automatic speech recognition for single-word commands in Arabic spoken. Soft Comput, 1\u201314","DOI":"10.1007\/s00500-023-07959-7"},{"key":"19750_CR23","doi-asserted-by":"publisher","first-page":"3364141","DOI":"10.1155\/2022\/3364141","volume":"2022","author":"J Oruh","year":"2022","unstructured":"Oruh J, Viriri S (2022) Deep learning-based classification of spoken english digits. Comput Intell Neurosci 2022:3364141","journal-title":"Comput Intell Neurosci"},{"issue":"3","key":"19750_CR24","doi-asserted-by":"publisher","first-page":"761","DOI":"10.1007\/s10772-021-09847-7","volume":"24","author":"A Ouisaadane","year":"2021","unstructured":"Ouisaadane A, Safi S (2021) A comparative study for Arabic speech recognition system in noisy environments. Int J Speech Technol 24(3):761\u2013770","journal-title":"Int J Speech Technol"},{"key":"19750_CR25","doi-asserted-by":"crossref","unstructured":"Paul B, Mukherjee H, Phadikar S, Roy K (2019) MFCC-Based bangla vowel phoneme recognition from micro clips. In: International conference on intelligent computing and communication. Springer, Singapore, pp 511\u2013519","DOI":"10.1007\/978-981-15-1084-7_49"},{"key":"19750_CR26","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-15598-1","author":"B Paul","year":"2023","unstructured":"Paul B, Phadikar S (2023) A hybrid feature-extracted deep CNN with reduced parameters substitutes an end-to-end CNN for the recognition of spoken Bengali digits. Multimed Tools Appl. https:\/\/doi.org\/10.1007\/s11042-023-15598-1","journal-title":"Multimed Tools Appl"},{"key":"19750_CR27","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-022-13594-5","author":"B Paul","year":"2022","unstructured":"Paul B, Phadikar S (2022) A novel pre-processing technique of amplitude interpolation for enhancing the classification accuracy of Bengali phonemes. Multimed Tools Appl. https:\/\/doi.org\/10.1007\/s11042-022-13594-5","journal-title":"Multimed Tools Appl"},{"key":"19750_CR28","doi-asserted-by":"publisher","first-page":"2454","DOI":"10.1007\/s00034-023-02570-5","volume":"43","author":"B Paul","year":"2024","unstructured":"Paul B, Phadikar S (2024) RAttSR: a novel low-cost reconstructed attention-based end-to-end speech recognizer. Circuits Syst Signal Process 43:2454\u20132476. https:\/\/doi.org\/10.1007\/s00034-023-02570-5","journal-title":"Circuits Syst Signal Process"},{"key":"19750_CR29","doi-asserted-by":"crossref","unstructured":"Peng Y, Kim K, Wu F, Sridhar P, Watanabe S (2023) Structured pruning of self-supervised pre-trained models for speech recognition and understanding. In: ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 1\u20135","DOI":"10.1109\/ICASSP49357.2023.10095335"},{"key":"19750_CR30","doi-asserted-by":"crossref","unstructured":"Raghudathesh GP, Chandrakala CB, Rao D (2023) Analysis and classification of spoken utterance using feature vector statistics and machine learning algorithms. In: 2023 International Conference on Network, Multimedia and Information Technology (NMITCON). IEEE, pp 1\u20136","DOI":"10.1109\/NMITCON58196.2023.10275906"},{"key":"19750_CR31","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.116256","volume":"193","author":"S Rani","year":"2022","unstructured":"Rani S, Bashir AK, Alhudhaif A, Koundal D, Gunduz ES (2022) An efficient CNN-LSTM model for sentiment detection in# BlackLivesMatter. Expert Syst Appl 193:116256","journal-title":"Expert Syst Appl"},{"issue":"2","key":"19750_CR32","first-page":"7","volume":"3","author":"SM Redwan","year":"2023","unstructured":"Redwan SM, Rashed-Al-Mahfuz M, Hamid ME (2023) Recognizing command words using deep recurrent neural network for both acoustic and throat speech. Eur J Inf Technol Comput Sci 3(2):7\u201313","journal-title":"Eur J Inf Technol Comput Sci"},{"key":"19750_CR33","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1016\/j.imavis.2018.04.004","volume":"75","author":"P Rodr\u00edguez","year":"2018","unstructured":"Rodr\u00edguez P, Bautista MA, Gonzalez J, Escalera S (2018) Beyond one-hot encoding: lower dimensional target embedding. Image Vis Comput 75:21\u201331","journal-title":"Image Vis Comput"},{"issue":"2","key":"19750_CR34","first-page":"941","volume":"9","author":"G Savitha","year":"2021","unstructured":"Savitha G (2021) Deep recurrent neural network based audio speech recognition system. Inform Technol Industry 9(2):941\u2013949","journal-title":"Inform Technol Industry"},{"issue":"4","key":"19750_CR35","doi-asserted-by":"publisher","first-page":"235","DOI":"10.2478\/jaiscr-2019-0006","volume":"9","author":"A Shewalkar","year":"2019","unstructured":"Shewalkar A (2019) Performance evaluation of deep neural networks applied to speech recognition: RNN, LSTM, and GRU. J Artif Intell Soft Comput Res 9(4):235\u2013245","journal-title":"J Artif Intell Soft Comput Res"},{"issue":"8","key":"19750_CR36","doi-asserted-by":"publisher","first-page":"21","DOI":"10.9790\/3021-04812125","volume":"4","author":"PP Singh","year":"2014","unstructured":"Singh PP, Rani P (2014) An approach to extract feature using MFCC. IOSR J Eng 4(8):21\u201325","journal-title":"IOSR J Eng"},{"key":"19750_CR37","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1016\/j.specom.2022.03.006","volume":"140","author":"MY Tachbelie","year":"2022","unstructured":"Tachbelie MY, Abate ST, Schultz T (2022) Multilingual speech recognition for GlobalPhone languages. Speech Commun 140:71\u201386","journal-title":"Speech Commun"},{"issue":"2","key":"19750_CR38","doi-asserted-by":"publisher","first-page":"200","DOI":"10.3934\/ElectrEng.2020.2.200","volume":"4","author":"HY Vani","year":"2020","unstructured":"Vani HY, Anusuya MA (2020) Improving speech recognition using bionic wavelet features. AIMS Electron Electr Eng 4(2):200\u2013215","journal-title":"AIMS Electron Electr Eng"},{"issue":"4","key":"19750_CR39","doi-asserted-by":"publisher","first-page":"893","DOI":"10.1007\/s10772-020-09768-x","volume":"23","author":"H Veisi","year":"2020","unstructured":"Veisi H, Mani AH (2020) Persian speech recognition using deep learning. Int J Speech Technol 23(4):893\u2013905","journal-title":"Int J Speech Technol"},{"issue":"1","key":"19750_CR40","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13677-020-00186-7","volume":"9","author":"Q Wang","year":"2020","unstructured":"Wang Q, Feng C, Xu Y, Zhong H, Sheng VS (2020) A novel privacy-preserving speech recognition framework using bidirectional LSTM. J Cloud Comput 9(1):1\u201313","journal-title":"J Cloud Comput"},{"key":"19750_CR41","doi-asserted-by":"crossref","unstructured":"Wazir ASMB, Chuah JH (2019) Spoken arabic digits recognition using deep learning. In: 2019 IEEE International Conference on Automatic Control and Intelligent Systems (I2CACIS). IEEE, pp 339\u2013344","DOI":"10.1109\/I2CACIS.2019.8825004"},{"issue":"2","key":"19750_CR42","doi-asserted-by":"publisher","first-page":"472","DOI":"10.1080\/1540496X.2020.1825935","volume":"58","author":"L Yu","year":"2022","unstructured":"Yu L, Zhou R, Chen R, Lai KK (2022) Missing data preprocessing in credit classification: one-hot encoding or imputation. Emerg Mark Financ Trade 58(2):472\u2013482","journal-title":"Emerg Mark Financ Trade"},{"key":"19750_CR43","doi-asserted-by":"crossref","unstructured":"Zerari N, Abdelhamid S, Bouzgou H, Raymond C (2018) Bi-directional recurrent end-to-end neural network classifier for spoken Arab digit recognition. In: 2018 2nd International Conference on Natural Language and Speech Processing (ICNLSP). IEEE, pp 1\u20136","DOI":"10.1109\/ICNLSP.2018.8374374"},{"issue":"1","key":"19750_CR44","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1515\/comp-2019-0004","volume":"9","author":"N Zerari","year":"2019","unstructured":"Zerari N, Abdelhamid S, Bouzgou H, Raymond C (2019) Bidirectional deep architecture for Arabic speech recognition. Open Comput Sci 9(1):92\u2013102","journal-title":"Open Comput Sci"},{"key":"19750_CR45","doi-asserted-by":"crossref","unstructured":"Zhang Q, Yuan X, Lam CT (2024) Recognition of score words in freestyle kayaking using improved DTW matching. Multimed Tools Appl 1\u201325","DOI":"10.1007\/s11042-024-18383-w"},{"key":"19750_CR46","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1016\/j.bspc.2018.08.035","volume":"47","author":"J Zhao","year":"2019","unstructured":"Zhao J, Mao X, Chen L (2019) Speech emotion recognition using deep 1D & 2D CNN LSTM networks. Biomed Signal Process Control 47:312\u2013323","journal-title":"Biomed Signal Process Control"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-19750-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-024-19750-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-19750-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,24]],"date-time":"2025-05-24T08:40:14Z","timestamp":1748076014000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-024-19750-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,4]]},"references-count":46,"journal-issue":{"issue":"17","published-online":{"date-parts":[[2025,5]]}},"alternative-id":["19750"],"URL":"https:\/\/doi.org\/10.1007\/s11042-024-19750-3","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,7,4]]},"assertion":[{"value":"6 July 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 April 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 June 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 July 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"There is no conflict of Interest between the authors regarding the manuscript preparation and submission.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}