{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T08:38:52Z","timestamp":1777711132644,"version":"3.51.4"},"reference-count":144,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/100009392","name":"Prince Sattam bin Abdulaziz University","doi-asserted-by":"publisher","award":["PSAU\/2023\/R\/1445"],"award-info":[{"award-number":["PSAU\/2023\/R\/1445"]}],"id":[{"id":"10.13039\/100009392","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/access.2024.3376237","type":"journal-article","created":{"date-parts":[[2024,3,11]],"date-time":"2024-03-11T18:13:55Z","timestamp":1710180835000},"page":"39689-39716","source":"Crossref","is-referenced-by-count":11,"title":["Arabic Speech Recognition: Advancement and Challenges"],"prefix":"10.1109","volume":"12","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4308-8527","authenticated-orcid":false,"given":"Ashifur","family":"Rahman","sequence":"first","affiliation":[{"name":"RIoT Research Center, Independent University, Dhaka, Bangladesh"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9624-5499","authenticated-orcid":false,"given":"Md. Mohsin","family":"Kabir","sequence":"additional","affiliation":[{"name":"Superior Polytechnic School, University of Girona, Girona, Spain"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5738-1631","authenticated-orcid":false,"given":"M. F.","family":"Mridha","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, American International University-Bangladesh, Dhaka, Bangladesh"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1199-9932","authenticated-orcid":false,"given":"Mohammed","family":"Alatiyyah","sequence":"additional","affiliation":[{"name":"Department of Computer Science, College of Computer Engineering and Sciences, Prince Sattam bin Abdulaziz University, Al-Kharj, Saudi Arabia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6503-2826","authenticated-orcid":false,"given":"Haifa F.","family":"Alhasson","sequence":"additional","affiliation":[{"name":"Department of Information Technology, College of Computer, Qassim University, Buraydah, Saudi Arabia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2121-0296","authenticated-orcid":false,"given":"Shuaa S.","family":"Alharbi","sequence":"additional","affiliation":[{"name":"Department of Information Technology, College of Computer, Qassim University, Buraydah, Saudi Arabia"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/5.18626"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2021.101278"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1063\/5.0094741"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCSE1.2018.8374215"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-21902-4_2"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.3390\/app12178898"},{"key":"ref7","first-page":"26","article-title":"End-to-end Arabic speech recognition: A review","volume-title":"Proc. 19th Conf. Lang. Eng.","author":"Abdelhamid"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1142\/9789813229396_0001"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3177191"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1986.1169197"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2014.7078629"},{"issue":"2","key":"ref12","first-page":"11","article-title":"Comparative analysis of Arabic vowels using formants and an automatic speech recognition system","volume":"3","author":"Alotaibi","year":"2010","journal-title":"Int. J. Signal Process., Image Process. Pattern Recognit."},{"issue":"175","key":"ref13","first-page":"12","article-title":"The htk book","volume":"3","author":"Young","year":"2002","journal-title":"Cambridge Univ. Eng. Dept."},{"key":"ref14","article-title":"Speaker independent Arabic speech recognition using support vector machine","author":"El-Mashad","year":"2017"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2017.2705720"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/SNLP.2009.5340923"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.3390\/informatics8040069"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/s10772-008-9009-1"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCE.2010.5556829"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ITSIM.2010.5561391"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/AICCSA.2001.933957"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/STUDENT.2012.6408392"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s10772-021-09847-7"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1049\/sil2.12057"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003968"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICTKE.2017.8259629"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s13042-020-01096-5"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.101869"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/I2CACIS.2019.8825004"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CSPA.2015.7225644"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2007.4430100"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1515\/comp-2019-0004"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.heliyon.2020.e03372"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/2951913.2976746"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/s10772-009-9026-8"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ISSPIT.2003.1341178"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2021.05.082"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1155\/2023\/7398538"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/IRASET57153.2023.10152979"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/s13369-023-07670-7"},{"key":"ref43","volume-title":"Automatic Speech Recognition","volume":"1","author":"Yu","year":"2016"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1002\/9781118922590.ch23"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2021.101272"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.11591\/ijece.v13i1.pp400-412"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/PROC.1976.10158"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/MASSP.1986.1165342"},{"key":"ref49","volume-title":"Speech Recognition Using Neural Networks","author":"Tebelskis","year":"1995"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1561\/9781601981219"},{"issue":"4","key":"ref51","first-page":"5258","article-title":"Review of algorithms and applications in speech recognition system","volume":"5","author":"Rashmi","year":"2014","journal-title":"Int. J. Comput. Sci. Inf. Technol."},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-0029-9_40"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2019.112840"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.4304\/jmm.2.5.13-18"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1016\/j.sigpro.2004.03.004"},{"key":"ref56","first-page":"129","article-title":"Wavelet-Fourier analysis for speaker recognition","volume-title":"Proc. 17th Nat. Conf. Appl. Math. Biol. Med.","volume":"134","author":"Zi\u00f3\u0142ko"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1992.225984"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2064307"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854363"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/BF02943243"},{"key":"ref61","first-page":"135","article-title":"Speech recognition using MFCC","volume-title":"Proc. Int. Conf. Comput. Graph., Simul. Model.","volume":"9","author":"Ittichaichareon"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.21437\/Eurospeech.1997-120"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/89.326616"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2005-184"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/VECIMS.2004.1397204"},{"issue":"6","key":"ref66","first-page":"1","article-title":"Feature extraction methods LPC, PLP and MFCC in speech recognition","volume":"1","author":"Dave","year":"2013","journal-title":"Int. J. Advance Res. Eng. Technol."},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.5120\/1462-1976"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1145\/3319619.3321951"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461935"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707742"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-3210-1"},{"key":"ref72","first-page":"1764","article-title":"Towards end-to-end speech recognition with recurrent neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Graves"},{"key":"ref73","article-title":"Deep speech: Scaling up end-to-end speech recognition","author":"Hannun","year":"2014","journal-title":"arXiv:1412.5567"},{"key":"ref74","first-page":"1","article-title":"Attention-based models for speech recognition","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"28","author":"Chorowski"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462506"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref77","article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","author":"Chung","year":"2014","journal-title":"arXiv:1412.3555"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-491"},{"key":"ref79","first-page":"3109","article-title":"Kacst Arabic phonetic database","volume-title":"Proc. 15th Int. Congr. Phonetics Sci., Barcelona","author":"Alghmadi"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/ICM.2003.237884"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/MWSCAS.2003.1562380"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/ICTTA.2006.1684560"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/ICCES.2008.4772985"},{"issue":"2","key":"ref84","first-page":"186","article-title":"Investigation Arabic speech recognition using CMU sphinx system","volume":"6","author":"Satori","year":"2009","journal-title":"Int. Arab J. Inf. Technol. (IAJIT)"},{"issue":"4","key":"ref85","first-page":"211","article-title":"Syllable-based automatic Arabic speech recognition in noisy-telephone channel","volume":"4","author":"Azmi","year":"2008","journal-title":"WSEAS Trans. Signal Process."},{"key":"ref86","article-title":"Mediaspeech: Multilanguage ASR benchmark and dataset","author":"Kolobov","year":"2021","journal-title":"arXiv:2103.16193"},{"key":"ref87","volume-title":"Quran Ayat Speech to Text","year":"2022"},{"key":"ref88","volume-title":"Tunisian Modern Standard Arabic","year":"2017"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3090109"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-022-13645-x"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(93)90095-3"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.4236\/jcc.2015.36001"},{"key":"ref94","first-page":"11","article-title":"Mel frequency cepstral coefficients for music modeling","volume-title":"Proc. ISMIR","volume":"270","author":"Logan"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/CONIELECOMP.2012.6189918"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1007\/s10772-017-9456-7"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1121\/1.399423"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2005-138"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472808"},{"key":"ref100","article-title":"Evaluating gammatone frequency cepstral coefficients with neural networks for emotion recognition from speech","author":"Liu","year":"2018","journal-title":"arXiv:1806.09010"},{"issue":"4","key":"ref101","first-page":"467","article-title":"Class-based n-gram models of natural language","volume":"18","author":"Brown","year":"1992","journal-title":"Comput. linguistics"},{"key":"ref102","first-page":"258","article-title":"Faster and smaller n-gram language models","volume-title":"Proc. 49th Annu. Meeting Assoc. Comput. Linguistics, Human Lang. Technol.","author":"Pauls"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.4249\/scholarpedia.3881"},{"key":"ref104","article-title":"A survey on neural network language models","author":"Jing","year":"2019","journal-title":"arXiv:1906.03591"},{"key":"ref105","article-title":"ChatGPT is on the horizon: Could a large language model be suitable for intelligent traffic safety research and applications?","author":"Zheng","year":"2023","journal-title":"arXiv:2303.05382"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-2074"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2616"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1016\/S0959-440X(96)80056-X"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1038\/nbt1004-1315"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/53.54527"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2007.367155"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2010.5495662"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-73003-5_196"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2010.06.003"},{"key":"ref115","first-page":"1","article-title":"Cross-dialectal data transferring for Gaussian mixture model training in Arabic speech recognition","volume":"1","author":"Huang","year":"2012","journal-title":"Constraints"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.3389\/fpubh.2022.898355"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2896880"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.21608\/ejle.2020.47685.1015"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16014-1_11"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1109\/ICEngTechnol.2017.8308186"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1145\/3109859.3109877"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/ISCIT.2004.1412458"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.2478\/jaiscr-2019-0006"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1142\/9789813229396_0011"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/MWSCAS.2017.8053243"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.51173\/jt.v5i1.749"},{"key":"ref127","article-title":"Transformers in speech processing: A survey","author":"Latif","year":"2023","journal-title":"arXiv:2303.11607"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.32604\/cmc.2023.033457"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1501\/0003168"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1007\/BFb0048702"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/TCOMM.1994.577040"},{"key":"ref132","article-title":"Quran recitation recognition using end-to-end deep learning","author":"Al Harere","year":"2023","journal-title":"arXiv:2305.07034"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1006\/csla.2001.0184"},{"key":"ref134","article-title":"Part 5: Machine translation evaluation","author":"Dorr","year":"2011"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-48308-5_15"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-2004"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1109\/34.771314"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.21437\/Eurospeech.1997-68"},{"issue":"1","key":"ref139","first-page":"31","article-title":"Survey paper on different speech recognition algorithm: Challenges and techniques","volume":"175","author":"Vadwala","year":"2017","journal-title":"Int. J. Comput. Appl."},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-10-6626-9_23"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1109\/ICICT48043.2020.9112582"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-019-09775-8"},{"key":"ref143","first-page":"12449","article-title":"Wav2vec 2.0: A framework for self-supervised learning of speech representations","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst. (NIPS)","author":"Baevski"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2017.2762739"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/10380310\/10466536.pdf?arnumber=10466536","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,25]],"date-time":"2024-06-25T21:10:46Z","timestamp":1719349846000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10466536\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":144,"URL":"https:\/\/doi.org\/10.1109\/access.2024.3376237","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}