{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T15:14:09Z","timestamp":1777130049886,"version":"3.51.4"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T00:00:00Z","timestamp":1767744000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T00:00:00Z","timestamp":1767744000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s10772-025-10227-8","type":"journal-article","created":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T13:47:03Z","timestamp":1767793623000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Arabic speech command recognition using an enhanced CNN-LSTM model with attention and data augmentation"],"prefix":"10.1007","volume":"29","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2027-8336","authenticated-orcid":false,"given":"Naouar","family":"Laaidi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Meryam","family":"Telmem","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mohamed","family":"Lamrini","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7393-5726","authenticated-orcid":false,"given":"Hassan","family":"Satori","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,7]]},"reference":[{"issue":"10","key":"10227_CR1","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1109\/TASLP.2014.2339736","volume":"22","author":"O. Abdel-Hamid","year":"2014","unstructured":"Abdel-Hamid, O., Mohamed, A. R., Jiang, H., & Penn, G. (2014). Convolutional neural networks for speech recognition. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 22(10), 1533\u20131545.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"10227_CR3","doi-asserted-by":"crossref","unstructured":"Al Dabel, M. (2022). Speech attribute detection to recognize arabic broadcast speech in industrial networks. Mobile Information Systems, 2022(1), 3732442.","DOI":"10.1155\/2022\/3732442"},{"key":"10227_CR4","doi-asserted-by":"crossref","first-page":"123","DOI":"10.1007\/s42835-022-01185-1","volume":"18","author":"A. Al-Nasheri","year":"2023","unstructured":"Al-Nasheri, A., & Al-Dossari, H. (2023). Arabic speech recognition using neural networks. Journal of Electrical Engineering & Technology, 18, 123\u2013134.","journal-title":"Journal of Electrical Engineering & Technology"},{"key":"10227_CR5","doi-asserted-by":"crossref","unstructured":"Al-Selwi, S. M., Hassan, M. F., Abdulkadir, S. J., Muneer, A., Sumiea, E. H., Alqushaibi, A., & Ragab, M. G. (2024). RNN-LSTM: From applications to modeling techniques and beyond\u2014systematic review. Journal of King Saud University-Computer and Information Sciences, 102068.","DOI":"10.1016\/j.jksuci.2024.102068"},{"key":"10227_CR6","doi-asserted-by":"crossref","unstructured":"Alim, M. A., Setumin, S., Rosli, A. D., & Ani, A. I. C. (2021, April). Development of a voice-controlled intelligent wheelchair system using raspberry pi. In 2021 IEEE 11th IEEE symposium on computer applications & industrial electronics (ISCAIE) (pp. 274\u2013278). IEEE.","DOI":"10.1109\/ISCAIE51753.2021.9431815"},{"issue":"1","key":"10227_CR7","first-page":"16","volume":"1","author":"K. Almeman","year":"2013","unstructured":"Almeman, K., & Lee, M. (2013). Automatic speech recognition technology for Arabic language: Challenges and directions. International Journal of Signal Processing Systems, 1(1), 16\u201323.","journal-title":"International Journal of Signal Processing Systems"},{"issue":"1","key":"10227_CR8","first-page":"8661","volume":"13","author":"Y. Alotaibi","year":"2023","unstructured":"Alotaibi, Y., & Alghamdi, M. (2023). Development of a deep learning-based Arabic speech recognition system for automatons. Engineering, Technology & Applied Science Research, 13(1), 8661.","journal-title":"Engineering, Technology & Applied Science Research"},{"issue":"2","key":"10227_CR9","doi-asserted-by":"publisher","first-page":"50","DOI":"10.21608\/ijicis.2021.73581.1086","volume":"21","author":"H. A. Alsayadi","year":"2021","unstructured":"Alsayadi, H. A., Abdelhamid, A. A., Hegazy, I., & Fayed, Z. T. (2021). Data augmentation for Arabic speech recognition based on end-to-end deep learning. International Journal of Intelligent Computing and Information Sciences, 21(2), 50\u201364.","journal-title":"International Journal of Intelligent Computing and Information Sciences"},{"key":"10227_CR10","unstructured":"Amodei, D., Ananthanarayanan, S., Anubhai, R., Bai, J., Battenberg, E., Case, C. \u2026 Zhu, Z. (2016). Deep speech 2: End-to-end speech recognition in English and Mandarin. In International conference on machine learning (pp. 173\u2013182). PMLR."},{"key":"10227_CR11","unstructured":"Bahdanau, D., Cho, K., & Bengio, Y. (2015). Neural machine translation by jointly learning to align and translate. In International conference on learning representations (ICLR)."},{"issue":"1","key":"10227_CR12","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1007\/s10772-023-10023-2","volume":"26","author":"M. Barhoush","year":"2023","unstructured":"Barhoush, M., Hallawa, A., & Schmeink, A. (2023). Speaker identification and localization using shuffled MFCC features and deep learning. International Journal of Speech Technology, 26(1), 185\u2013196.","journal-title":"International Journal of Speech Technology"},{"key":"10227_CR13","unstructured":"Benamer, A., & Alkishriwo, A. (2020). Database for Arabic speech commands recognition. In CEST-2020 conference."},{"key":"10227_CR14","doi-asserted-by":"crossref","unstructured":"Bharathi, V., Renugadevi, N., Padmapriya, J., & Vijayprakash, M. (2021). Regional language recognition system for industry 4.0. In Security issues and privacy concerns in industry 4.0 Applications, (pp. 35\u201353).","DOI":"10.1002\/9781119776529.ch3"},{"key":"10227_CR15","doi-asserted-by":"publisher","first-page":"107938","DOI":"10.1016\/j.comcom.2024.107938","volume":"228","author":"S. Bini","year":"2024","unstructured":"Bini, S., Carletti, V., Saggese, A., & Vento, M. (2024). Robust speech command recognition in challenging industrial environments. Computer Communications, 228, 107938.","journal-title":"Computer Communications"},{"key":"10227_CR16","unstructured":"Chorowski, J. K., Bahdanau, D., Serdyuk, D., Cho, K., & Bengio, Y. (2015). Attention-based models for speech recognition. Advances in Neural Information Processing Systems (NeurIPS), 577\u2013585."},{"key":"10227_CR42","doi-asserted-by":"crossref","unstructured":"Davis, S., & Mermelstein, P. (1980). Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Transactions on Acoustics, Speech, and Signal Processing, 28(4), 357\u2013366.","DOI":"10.1109\/TASSP.1980.1163420"},{"key":"10227_CR17","doi-asserted-by":"publisher","first-page":"104267","DOI":"10.1016\/j.engappai.2021.104267","volume":"102","author":"A. Ghandoura","year":"2021","unstructured":"Ghandoura, A., Hjabo, F., & Al Dakkak, O. (2021). Building and benchmarking an Arabic speech commands dataset for small-footprint keyword spotting. Engineering Applications of Artificial Intelligence, 102, 104267.","journal-title":"Engineering Applications of Artificial Intelligence"},{"key":"10227_CR18","volume-title":"Deep learning","author":"I. Goodfellow","year":"2016","unstructured":"Goodfellow, I., Bengio, Y., & Courville, A. (2016). Deep learning. MIT press."},{"key":"10227_CR19","doi-asserted-by":"crossref","unstructured":"Graves, A., Mohamed, A. R., & Hinton, G. (2013). Speech recognition with deep recurrent neural networks. In IEEE international conference on acoustics, speech, and signal processing (ICASSP) (pp. 6645\u20136649).","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"10227_CR20","doi-asserted-by":"crossref","unstructured":"Gulati, A., Qin, J., Chiu, C.-C., Parmar, N., Zhang, Y., Yu, J.Wu, Y. \u2026 Wu, Y. (2020). Conformer: Convolution-augmented transformer for speech recognition. In Interspeech 2020, (pp. 5036\u20135040).","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"10227_CR21","unstructured":"Hannun, A., Case, C., Casper, J., Catanzaro, B., Diamos, G., Elsen, E.Ng, A. Y. \u2026 Ng, A. Y. (2014). Deep speech: Scaling up end-to-end speech recognition. arXiv preprint arXiv:1412.5567."},{"key":"10227_CR22","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 770\u2013778).","DOI":"10.1109\/CVPR.2016.90"},{"key":"10227_CR23","first-page":"1097","volume":"25","author":"A. Krizhevsky","year":"2012","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. Advances in Neural Information Processing Systems, 25, 1097\u20131105.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10227_CR24","doi-asserted-by":"crossref","unstructured":"Laaidi, N., Ezzine, A., Telmem, M., Lamrini, M., & Satori, H. (2025, May). Building a contextualized Arabic voice command corpus for industrial automation systems. In 2025 5th international conference on innovative research in applied science, Engineering and Technology (IRASET) (pp. 1\u20136). IEEE.","DOI":"10.1109\/IRASET64571.2025.11008248"},{"key":"10227_CR25","doi-asserted-by":"crossref","unstructured":"Lane, N. D., Bhattacharya, S., Georgiev, P., Forlivesi, C., Jiao, L., Qian, Z., & Kawsar, F. (2015). DeepX: A software accelerator for low-power deep learning inference on mobile devices. In Proceedings of the 14th ACM international conference on information processing in sensor networks (IPSN).","DOI":"10.1109\/IPSN.2016.7460664"},{"issue":"7553","key":"10227_CR26","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y. LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., & Hinton, G. (2015). Deep learning. Nature, 521(7553), 436\u2013444.","journal-title":"Nature"},{"key":"10227_CR27","unstructured":"Lin, Z., Feng, M., Santos, C. N. D., Yu, M., Xiang, B., Zhou, B., & Bengio, Y. (2017). A structured self-attentive sentence embedding. arXiv preprint arXiv:1703.03130."},{"key":"10227_CR28","unstructured":"Liu, X. (2018). Deep convolutional and LSTM neural networks for acoustic modelling in automatic speech recognition."},{"key":"10227_CR29","doi-asserted-by":"crossref","unstructured":"Luong, M.-T., Pham, H., & Manning, C. D. (2015). Effective approaches to attention-based neural machine translation. In Conference on empirical methods in natural language processing (EMNLP) (pp. 1412\u20131421).","DOI":"10.18653\/v1\/D15-1166"},{"issue":"9","key":"10227_CR30","doi-asserted-by":"publisher","first-page":"517","DOI":"10.3390\/info15090517","volume":"15","author":"I. D. Mienye","year":"2024","unstructured":"Mienye, I. D., Swart, T. G., & Obaido, G. (2024). Recurrent neural networks: A comprehensive review of architectures, variants, and applications. Information, 15(9), 517.","journal-title":"Information"},{"issue":"1","key":"10227_CR31","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1109\/TASL.2011.2109382","volume":"20","author":"A. Mohamed","year":"2012","unstructured":"Mohamed, A., Dahl, G. E., & Hinton, G. (2012). Acoustic modeling using deep belief networks. IEEE Transactions on Audio, Speech, and Language Processing, 20(1), 14\u201322.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"10227_CR2","unstructured":"Noughreche, A., Boulouma, S., & Benbaghdad, M. (2021). Design and implementation of an automatic speech recognition based voice control system. In Conference on electrical engineering."},{"issue":"9","key":"10227_CR32","doi-asserted-by":"publisher","first-page":"385","DOI":"10.3390\/a17090385","volume":"17","author":"S. Ouali","year":"2024","unstructured":"Ouali, S., & El Garouani, S. (2024). Efficient and robust Arabic automotive speech command recognition system. Algorithms, 17(9), 385.","journal-title":"Algorithms"},{"issue":"1","key":"10227_CR33","doi-asserted-by":"publisher","first-page":"989","DOI":"10.12785\/ijcds\/150170","volume":"15","author":"N. Oukas","year":"2024","unstructured":"Oukas, N., Haboussi, S., Maiza, C., & Benslimane, N. (2024). ArabAlg: A new dataset for Arabic speech commands recognition for machine learning purposes. International Journal of Computing and Digital Systems, 15(1), 989\u20131005.","journal-title":"International Journal of Computing and Digital Systems"},{"key":"10227_CR34","doi-asserted-by":"crossref","unstructured":"Park, D. S., Chan, W., Zhang, Y., Chiu, C.-C., Zoph, B., Cubuk, E. D., & Le, Q. V. (2019). SpecAugment: A simple data augmentation method for automatic speech recognition. In Interspeech, 2019 (pp. 2613\u20132617).","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"10227_CR35","unstructured":"Simonyan, K., & Zisserman, A. (2015). Very deep convolutional networks for large-scale image recognition. In International conference on learning representations (ICLR)."},{"issue":"2","key":"10227_CR36","doi-asserted-by":"publisher","first-page":"541","DOI":"10.1007\/s10772-023-10035-y","volume":"26","author":"M. Tellai","year":"2023","unstructured":"Tellai, M., Gao, L., & Mao, Q. (2023). An efficient speech emotion recognition based on a dual-stream CNN-transformer fusion network. International Journal of Speech Technology, 26(2), 541\u2013557.","journal-title":"International Journal of Speech Technology"},{"issue":"4","key":"10227_CR37","doi-asserted-by":"publisher","first-page":"1099","DOI":"10.1007\/s10772-023-10080-7","volume":"26","author":"M. Tellai","year":"2023","unstructured":"Tellai, M., & Mao, Q. (2023). CCTG-NET: Contextualized convolutional transformer-GRU network for speech emotion recognition. International Journal of Speech Technology, 26(4), 1099\u20131116.","journal-title":"International Journal of Speech Technology"},{"issue":"5","key":"10227_CR38","doi-asserted-by":"publisher","first-page":"2211","DOI":"10.1007\/s11760-022-02436-4","volume":"17","author":"A. Ustubioglu","year":"2023","unstructured":"Ustubioglu, A., Ustubioglu, B., & Ulutas, G. (2023). Mel spectrogram-based audio forgery detection using CNN. Signal, Image and Video Processing, 17(5), 2211\u20132219.","journal-title":"Signal, Image and Video Processing"},{"key":"10227_CR39","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N.Polosukhin, I. \u2026 Polosukhin, I. (2017). Attention is all you need. Advances in Neural Information Processing Systems (NeurIPS), 5998\u20136008."},{"key":"10227_CR40","volume-title":"Automatic speech recognition: A deep learning approach","author":"D. Yu","year":"2016","unstructured":"Yu, D., & Deng, L. (2016). Automatic speech recognition: A deep learning approach. Springer."},{"key":"10227_CR41","doi-asserted-by":"publisher","first-page":"108258","DOI":"10.1016\/j.apacoust.2021.108258","volume":"182","author":"T. Zhang","year":"2021","unstructured":"Zhang, T., Feng, G., Liang, J., & An, T. (2021). Acoustic scene classification based on MEL spectrogram decomposition and model merging. Applied Acoustics, 182, 108258.","journal-title":"Applied Acoustics"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-025-10227-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-025-10227-8","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-025-10227-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T13:22:55Z","timestamp":1774876975000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-025-10227-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,7]]},"references-count":42,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["10227"],"URL":"https:\/\/doi.org\/10.1007\/s10772-025-10227-8","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,7]]},"assertion":[{"value":"27 February 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"18"}}