{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,31]],"date-time":"2025-12-31T12:14:42Z","timestamp":1767183282615},"reference-count":54,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2021,6,14]],"date-time":"2021-06-14T00:00:00Z","timestamp":1623628800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,6,14]],"date-time":"2021-06-14T00:00:00Z","timestamp":1623628800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2021,12]]},"DOI":"10.1007\/s10772-021-09853-9","type":"journal-article","created":{"date-parts":[[2021,6,14]],"date-time":"2021-06-14T05:02:41Z","timestamp":1623646961000},"page":"1017-1032","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["A deep learning approach for automatic speech recognition of The Holy Qur\u2019\u0101n recitations"],"prefix":"10.1007","volume":"24","author":[{"given":"Imad K.","family":"Tantawi","sequence":"first","affiliation":[]},{"given":"Mohammad A. M.","family":"Abushariah","sequence":"additional","affiliation":[]},{"given":"Bassam H.","family":"Hammo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,6,14]]},"reference":[{"key":"9853_CR1","unstructured":"Abudena, M. A. (2015). Proposal to encode Quranic marks used in Quran published in Libya. L2\/15-329, Complete UTC Document Register."},{"key":"9853_CR2","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1007\/s10772-017-9403-7","volume":"20","author":"MA Abushariah","year":"2017","unstructured":"Abushariah, M. A. (2017). TAMEEM V1. 0: Speakers and text independent Arabic automatic continuous speech recognizer. International Journal of Speech Technology, 20, 261\u2013280.","journal-title":"International Journal of Speech Technology"},{"key":"9853_CR3","doi-asserted-by":"publisher","first-page":"601","DOI":"10.1007\/s10579-011-9166-8","volume":"46","author":"MA Abushariah","year":"2012","unstructured":"Abushariah, M. A., Ainon, R. N., Zainuddin, R., Elshafei, M., & Khalifa, O. O. (2012). Phonetically rich and balanced text and speech corpora for Arabic language. Language Resources and Evaluation, 46, 601\u2013634.","journal-title":"Language Resources and Evaluation"},{"key":"9853_CR4","doi-asserted-by":"crossref","unstructured":"Abushariah, M. A., Ainon, R. N., Zainuddin, R., Khalifa, O. O. & Elshafei, M. (2010). Phonetically rich and balanced Arabic speech corpus: an overview. In International conference on computer and communication engineering (ICCCE\u201910) (pp. 1\u20136). IEEE.","DOI":"10.1109\/ICCCE.2010.5556832"},{"key":"9853_CR5","doi-asserted-by":"crossref","unstructured":"Adda-Decker, M. & Lamel, L. (2006). Multilingual dictionaries. In Schultz, T., Kirchhoff, K. (Eds.), Multilingual Speech Processing (pp. 123\u2013168).","DOI":"10.1016\/B978-012088501-5\/50008-1"},{"key":"9853_CR6","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/S1319-1578(04)80010-7","volume":"16","author":"MM Al-Ghamdi","year":"2004","unstructured":"Al-Ghamdi, M. M., Al-Muhtasib, H., & Elshafei, M. (2004). Phonetic rules in arabic script. Journal of King Saud University-Computer and Information Sciences, 16, 85\u2013115.","journal-title":"Journal of King Saud University-Computer and Information Sciences"},{"key":"9853_CR7","doi-asserted-by":"crossref","unstructured":"Ali, M., Elshafei, M., Al-Ghamdi, M., Al-Muhtaseb, H. & Al-Najjar, A. (2008). Generation of Arabic phonetic dictionaries for speech recognition. In 2008 International conference on innovations in information technology (pp. 59\u201363). IEEE.","DOI":"10.1109\/INNOVATIONS.2008.4781716"},{"key":"9853_CR8","unstructured":"Al-Imam, A. A. (2006). Variant readings of the Qur\u02bcan: A critical study of their historical and linguistic origins, International Institute of Islamic Thought (IIIT)."},{"key":"9853_CR9","doi-asserted-by":"crossref","unstructured":"Allauzen, C., Riley, M., Schalkwyk, J., Skut, W. & Mohri, M. (2007). OpenFst: A general and efficient weighted finite-state transducer library. In International conference on implementation and application of automata (pp. 11\u201323). Springer.","DOI":"10.1007\/978-3-540-76336-9_3"},{"key":"9853_CR10","first-page":"571","volume":"98","author":"AAM Alqudah","year":"2020","unstructured":"Alqudah, A. A. M., Alshraideh, M. A. M., & Sharieh, A. A. S. (2020). Arabic disordered speech phonetic dictionary generator for automatic speech recognition. Journal of Theoretical and Applied Information Technology, 98, 571\u2013586.","journal-title":"Journal of Theoretical and Applied Information Technology"},{"key":"9853_CR11","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1016\/j.specom.2016.11.004","volume":"86","author":"M Alsulaiman","year":"2017","unstructured":"Alsulaiman, M., Mahmood, A., & Muhammad, G. (2017). Speaker recognition based on Arabic phonemes. Speech Communication, 86, 42\u201351.","journal-title":"Speech Communication"},{"key":"9853_CR12","doi-asserted-by":"publisher","first-page":"198","DOI":"10.1016\/j.procs.2018.10.476","volume":"142","author":"N Alsunaidi","year":"2018","unstructured":"Alsunaidi, N., Alzeer, L., Alkatheiri, M., Habbabah, A., Alattas, M., Aljabri, M., et al. (2018). Abjad: Towards interactive learning approach to arabic reading based on speech recognition. Procedia Computer Science, 142, 198\u2013205.","journal-title":"Procedia Computer Science"},{"key":"9853_CR13","doi-asserted-by":"crossref","unstructured":"Axelrod, S., Gopinath, R., Olsen, P. & Visweswariah, K. (2003). Dimensional reduction, covariance modeling, and computational complexity in ASR systems. In 2003 IEEE international conference on acoustics, speech, and signal processing, 2003. Proceedings.(ICASSP\u201903) (pp. I\u2013I). IEEE.","DOI":"10.1109\/ICASSP.2003.1198918"},{"key":"9853_CR14","doi-asserted-by":"publisher","first-page":"404","DOI":"10.1109\/TIT.1975.1055419","volume":"21","author":"L Bahl","year":"1975","unstructured":"Bahl, L., & Jelinek, F. (1975). Decoding for channels with insertions, deletions, and substitutions with applications to speech recognition. IEEE Transactions on Information Theory, 21, 404\u2013411.","journal-title":"IEEE Transactions on Information Theory"},{"key":"9853_CR15","unstructured":"Bellegdi, S. A. & Al-Muhtaseb, H. A. (2015). Automatic rule based phonetic transcription and syllabification for quranic text. Unpublished."},{"key":"9853_CR16","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.specom.2013.07.008","volume":"56","author":"L Besacier","year":"2014","unstructured":"Besacier, L., Barnard, E., Karpov, A., & Schultz, T. (2014). Automatic speech recognition for under-resourced languages: A survey. Speech Communication, 56, 85\u2013100.","journal-title":"Speech Communication"},{"key":"9853_CR17","doi-asserted-by":"crossref","unstructured":"Bezoui, M., Elmoutaouakkil, A. & Beni-Hssane, A. (2016). Feature extraction of some Quranic recitation using mel-frequency cepstral coefficients (MFCC). In 2016 5th international conference on multimedia computing and systems (ICMCS) (pp. 127\u2013131). IEEE.","DOI":"10.1109\/ICMCS.2016.7905619"},{"key":"9853_CR18","doi-asserted-by":"crossref","unstructured":"Braun, H., Luitjens, J. & Leary, R. (2019). GPU-accelerated Viterbi exact lattice decoder for batched online and offline speech recognition. arXiv preprint arXiv:1910.10032.","DOI":"10.1109\/ICASSP40776.2020.9054099"},{"key":"9853_CR19","unstructured":"Chodroff, E. (2018). Corpus phonetics tutorial. arXiv preprint arXiv:1811.05553."},{"key":"9853_CR20","doi-asserted-by":"crossref","unstructured":"Cosi, P. (2015). A kaldi-dnn-based asr system for italian. In 2015 international joint conference on neural networks (IJCNN) (pp. 1\u20135). IEEE.","DOI":"10.1109\/IJCNN.2015.7280336"},{"key":"9853_CR21","volume-title":"Tajweed rules of the Qur\u2019an","author":"KC Czerepinski","year":"2006","unstructured":"Czerepinski, K. C., & Swayd, A.-S. D. A. R. (2006). Tajweed rules of the Qur\u2019an. Dar Al-Khair Islamic Books Publisher."},{"key":"9853_CR22","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S Davis","year":"1980","unstructured":"Davis, S., & Mermelstein, P. (1980). Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Transactions on Acoustics, Speech, and Signal Processing, 28, 357\u2013366.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"9853_CR23","doi-asserted-by":"publisher","first-page":"305","DOI":"10.1016\/j.eij.2016.04.002","volume":"17","author":"MY el Amrani","year":"2016","unstructured":"el Amrani, M. Y., Rahman, M. H., Wahiddin, M. R., & Shah, A. (2016). Building CMU Sphinx language model for The Holy Quran using simplified Arabic phonemes. Egyptian Informatics Journal, 17, 305\u2013314.","journal-title":"Egyptian Informatics Journal"},{"key":"9853_CR24","unstructured":"Elhadj, Y. O. M., Aoun-Allah, M., Alsughaiyer, I. A. & Alansari, A. (2012). In A. Silva & E. Pontes (Eds.), A new scientific formulation of Tajweed rules for E-learning of Quran phonological rules (p. 197)."},{"key":"9853_CR25","doi-asserted-by":"publisher","first-page":"400","DOI":"10.1016\/j.procs.2019.12.122","volume":"163","author":"LA Elrefaei","year":"2019","unstructured":"Elrefaei, L. A., Alhassan, T. Q., & Omar, S. S. (2019). An Arabic visual dataset for visual speech recognition. Procedia Computer Science, 163, 400\u2013409.","journal-title":"Procedia Computer Science"},{"key":"9853_CR26","first-page":"565","volume":"16","author":"M Elshafei","year":"1991","unstructured":"Elshafei, M. (1991). Toward an Arabic text-to-speech system. The Arabian Journal for Science and Engineering, 16, 565\u2013583.","journal-title":"The Arabian Journal for Science and Engineering"},{"key":"9853_CR27","doi-asserted-by":"crossref","unstructured":"Erdogan, H. (2005). Regularizing linear discriminant analysis for speech recognition. In Ninth European conference on speech communication and technology.","DOI":"10.21437\/Interspeech.2005-144"},{"key":"9853_CR28","first-page":"45","volume-title":"Minimum word error rate decoding","author":"G Evermann","year":"1999","unstructured":"Evermann, G. (1999). Minimum word error rate decoding (pp. 45\u201367). Cambridge University."},{"key":"9853_CR29","doi-asserted-by":"crossref","unstructured":"Hafeez, A. H., Mohiuddin, K. & Ahmed, S. (2014). Speaker-dependent live quranic verses recitation recognition system using Sphinx-4 framework. In 17th IEEE international multi topic conference 2014 (pp. 333\u2013337). IEEE.","DOI":"10.1109\/INMIC.2014.7097361"},{"key":"9853_CR30","volume-title":"Speech and language processing: An introduction to natural language processing","author":"D Jurasky","year":"2000","unstructured":"Jurasky, D., & Martin, J. H. (2000). Speech and language processing: An introduction to natural language processing. Computational Linguistics and Speech Recognition."},{"key":"9853_CR31","unstructured":"KALDI_TEAM_CUDA. (2020). The CUDA matrix library [Online]. Retrieved April 28, 2020, from https:\/\/kaldi-asr.org\/doc\/cudamatrix.html."},{"key":"9853_CR32","unstructured":"KALDI_TEAM_MKL. (2020). External matrix libraries [Online]. Retrieved April 28, 2020, from https:\/\/kaldi-asr.org\/doc\/matrixwrap.html."},{"key":"9853_CR33","doi-asserted-by":"crossref","unstructured":"Khan, A. F. A., Mourad, O., Mannan, A. M. K. B., Dahan, H. B. A. M., & Abushariah, M. A. (2013). Automatic Arabic pronunciation scoring for computer aided language learning. 2013 1st international conference on communications, signal processing, and their applications (ICCSPA) (1\u20136). IEEE.","DOI":"10.1109\/ICCSPA.2013.6487246"},{"key":"9853_CR34","first-page":"35","volume":"172","author":"MO Khelifa","year":"2017","unstructured":"Khelifa, M. O., Elhadj, Y., Abdellah, Y., & Belkasmi, M. (2017). Strategies for implementing an optimal ASR system for quranic recitation recognition. International Journal of Computer Applications, 172, 35\u201341.","journal-title":"International Journal of Computer Applications"},{"key":"9853_CR35","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/j.specom.2005.01.004","volume":"46","author":"K Kirchhoff","year":"2005","unstructured":"Kirchhoff, K., & Vergyri, D. (2005). Cross-dialectal data sharing for acoustic modeling in Arabic speech recognition. Speech Communication, 46, 37\u201351.","journal-title":"Speech Communication"},{"key":"9853_CR36","unstructured":"Lawson, D. R. (2008). An evaluation of arabic transliteration methods."},{"key":"9853_CR37","unstructured":"Magre, S. B., Deshmukh, R. R. & Shrishrimal, P. P. (2013). A comparative study on feature extraction techniques in speech recognition. In International conference on recent advances in statistics and their application."},{"key":"9853_CR38","unstructured":"Mahmod, M. A. (2016). Automated quranic Tajweed checking rules system through recitation recognition: a review."},{"key":"9853_CR39","doi-asserted-by":"crossref","unstructured":"Malmasi, S. & Zampieri, M. (2017). Arabic dialect identification using iVectors and ASR transcripts. In Proceedings of the fourth workshop on NLP for similar languages, varieties and dialects (VarDial) (pp. 178\u2013183).","DOI":"10.18653\/v1\/W17-1222"},{"key":"9853_CR40","doi-asserted-by":"publisher","DOI":"10.1017\/CCOL0521831601","volume-title":"The Cambridge companion to the Qur\u2019\u0101n","author":"JD Mcauliffe","year":"2006","unstructured":"Mcauliffe, J. D. (2006). The Cambridge companion to the Qur\u2019\u0101n. Cambridge University Press."},{"key":"9853_CR41","doi-asserted-by":"crossref","unstructured":"Miao, Y., Zhang, H., & Metze, F. (2014). Towards speaker adaptive training of deep neural network acoustic models. In Fifteenth annual conference of the international speech communication association.","DOI":"10.21437\/Interspeech.2014-490"},{"key":"9853_CR42","doi-asserted-by":"crossref","unstructured":"Morris, A. C., Maier, V. & Green, P. (2004). From WER and RIL to MER and WIL: improved evaluation measures for connected speech recognition. In Eighth international conference on spoken language processing.","DOI":"10.21437\/Interspeech.2004-668"},{"key":"9853_CR43","unstructured":"Nasr, S. H., Dagli, C. K., Dakake, M. M., Lumbard, J. E. & Rustom, M. (2015). The study Quran. A new translation and commentary. New York."},{"key":"9853_CR44","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1145\/773473.178477","volume":"29","author":"RH Netzer","year":"1994","unstructured":"Netzer, R. H., & Weaver, M. H. (1994). Optimal tracing and incremental reexecution for debugging long-running programs. ACM SIGPLAN Notices, 29, 313\u2013325.","journal-title":"ACM SIGPLAN Notices"},{"key":"9853_CR45","doi-asserted-by":"crossref","unstructured":"Peddinti, V., Povey, D. & Khudanpur, S. (2015). A time delay neural network architecture for efficient modeling of long temporal contexts. In Sixteenth annual conference of the international speech communication association.","DOI":"10.21437\/Interspeech.2015-647"},{"key":"9853_CR46","unstructured":"Povey, D., Ghoshal, A., Boulianne, G., Burget, L., Glembek, O., Goel, N., Hannemann, M., Motlicek, P., Qian, Y. & Schwarz, P. (2011). The Kaldi speech recognition toolkit. IEEE 2011 workshop on automatic speech recognition and understanding. In IEEE Signal Processing Society."},{"key":"9853_CR47","doi-asserted-by":"crossref","unstructured":"Stolcke, A. (2002). SRILM-an extensible language modeling toolkit. In Seventh international conference on spoken language processing.","DOI":"10.21437\/ICSLP.2002-303"},{"key":"9853_CR48","doi-asserted-by":"crossref","unstructured":"Tabbal, H., El Falou, W., & Monla, B. (2006). Analysis and implementation of a\u201d Quranic\u201d verses delimitation system in audio files using speech recognition techniques. In 2006 2nd international conference on information & communication technologies (pp. 2979\u20132984). IEEE.","DOI":"10.1109\/ICTTA.2006.1684889"},{"key":"9853_CR49","unstructured":"Tanzil. (2020). Tanzil documents [Online]. Retrieved May 29, 2020, from http:\/\/tanzil.net\/docs\/download."},{"key":"9853_CR50","unstructured":"Techpowerup. (2019). NVIDIA GeForce GTX 1660 Ti [Online]. Retrieved October 27, 2019, from https:\/\/www.techpowerup.com\/gpu-specs\/geforce-gtx-1660-ti.c3364."},{"key":"9853_CR51","doi-asserted-by":"crossref","unstructured":"Trabelsi, I. & Ayed, D. B. (2012). On the use of different feature extraction methods for linear and non linear kernels. In 2012 6th international conference on sciences of electronics, technologies of information and telecommunications (SETIT) (pp. 797-802). IEEE.","DOI":"10.1109\/SETIT.2012.6482016"},{"key":"9853_CR52","doi-asserted-by":"crossref","unstructured":"Yousfi, B. & Zeki, A. M. (2017). Holy Qur\u2019an speech recognition system Imaalah checking rule for warsh recitation. In 2017 IEEE 13th international colloquium on signal processing & its applications (CSPA) (pp. 258\u2013263). IEEE.","DOI":"10.1109\/CSPA.2017.8064962"},{"key":"9853_CR53","doi-asserted-by":"publisher","first-page":"36","DOI":"10.30537\/sjcms.v2i1.61","volume":"2","author":"B Yousfi","year":"2018","unstructured":"Yousfi, B., Zeki, A. M., & Haji, A. (2018). Holy Qur\u2019an speech recognition system distinguishing the type of prolongation. Sukkur IBA Journal of Computing and Mathematical Sciences, 2, 36\u201343.","journal-title":"Sukkur IBA Journal of Computing and Mathematical Sciences"},{"key":"9853_CR54","doi-asserted-by":"publisher","first-page":"3077","DOI":"10.1007\/s13369-017-2415-4","volume":"42","author":"M Zakariah","year":"2017","unstructured":"Zakariah, M., Khan, M. K., Tayan, O., & Salah, K. (2017). Digital Quran computing: review, classification, and trend analysis. Arabian Journal for Science and Engineering, 42, 3077\u20133102.","journal-title":"Arabian Journal for Science and Engineering"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-021-09853-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-021-09853-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-021-09853-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,31]],"date-time":"2022-12-31T00:34:05Z","timestamp":1672446845000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-021-09853-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,14]]},"references-count":54,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2021,12]]}},"alternative-id":["9853"],"URL":"https:\/\/doi.org\/10.1007\/s10772-021-09853-9","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,6,14]]},"assertion":[{"value":"3 November 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 May 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 June 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}