{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T13:36:23Z","timestamp":1773840983863,"version":"3.50.1"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2017,9,20]],"date-time":"2017-09-20T00:00:00Z","timestamp":1505865600000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2017,12]]},"DOI":"10.1007\/s10772-017-9456-7","type":"journal-article","created":{"date-parts":[[2017,9,20]],"date-time":"2017-09-20T06:35:40Z","timestamp":1505889340000},"page":"937-949","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":24,"title":["Constructing accurate and robust HMM\/GMM models for an Arabic speech recognition system"],"prefix":"10.1007","volume":"20","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3564-9291","authenticated-orcid":false,"given":"Mohamed O. M.","family":"Khelifa","sequence":"first","affiliation":[]},{"given":"Yahya Mohamed","family":"Elhadj","sequence":"additional","affiliation":[]},{"given":"Yousfi","family":"Abdellah","sequence":"additional","affiliation":[]},{"given":"Mostafa","family":"Belkasmi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,9,20]]},"reference":[{"issue":"3","key":"9456_CR1","doi-asserted-by":"crossref","first-page":"201","DOI":"10.1109\/TASSP.1976.1162800","volume":"24","author":"B Atal","year":"1976","unstructured":"Atal, B., & Rabiner, L. (1976). A pattern recognition approach to voiced-unvoiced-silence classification with application to speech recognition. IEEE Transactions on Acoustics, Speech, and Signal Processing, 24(3), 201\u2013212.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"issue":"2","key":"9456_CR2","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1109\/TASSP.1979.1163209","volume":"27","author":"S Boll","year":"1979","unstructured":"Boll, S. (1979). Suppression of acoustic noise in speech using spectral subtraction. IEEE Transactions on Acoustics, Speech, and Signal Processing, 27(2), 113\u2013120.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"9456_CR3","unstructured":"Boril, H., & Poll\u00e1k, P. (2004). Direct time domain fundamental frequency estimation of speech in noisy conditions. In Proceedings of the EUSIPCO2004, Wien, Austria (Vol. 1, pp.\u00a01003\u20131006)."},{"key":"9456_CR4","doi-asserted-by":"crossref","first-page":"1129","DOI":"10.1016\/S0003-682X(01)00007-X","volume":"62","author":"A Cherif","year":"2001","unstructured":"Cherif, A., & Dabbabi, T. (2001). Pitch detection and formants analysis of Arabic speech processing. Applied Acoustics, 62, 1129\u20131140.","journal-title":"Applied Acoustics"},{"key":"9456_CR5","volume-title":"Speech and language processing: An introduction to natural language processing, computational linguistics, and speech recognition","author":"J Daniel","year":"2008","unstructured":"Daniel, J., & James, H. (2008) Speech and language processing: An introduction to natural language processing, computational linguistics, and speech recognition. (2nd\u00a0ed.). Upper Saddle River: Prentice Hall.","edition":"2"},{"issue":"4","key":"9456_CR6","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S Davis","year":"1980","unstructured":"Davis, S., Sants, B., & Mermelstein, P. (1980). Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Transaction on Acoustics, Speech and Signal Processing, 28(4), 357\u2013366.","journal-title":"IEEE Transaction on Acoustics, Speech and Signal Processing"},{"key":"9456_CR7","doi-asserted-by":"crossref","first-page":"341","DOI":"10.1006\/csla.2001.0171","volume":"15","author":"R Mori De","year":"2001","unstructured":"De Mori, R., Moisa, L., Gemello, R., Mana, F., & Albensano, D. (2001). Augmenting standard speech recognition features with energy gravity centres. Computer Speech and Language, 15, 341\u2013354.","journal-title":"Computer Speech and Language"},{"key":"9456_CR39","unstructured":"ElHadj, O. M. Y. et al. (2007). A manual system to segment and transcribe Arabic Speech. In Proceedings of IEEE ICSPC\u201907 (pp. 233\u2013236) Dubai, UAE."},{"key":"9456_CR40","doi-asserted-by":"crossref","unstructured":"Elhadj, O. M. Y., Alghamdi, M., & Alkanhal, M. (2013a) Approach for recognizing allophonic sounds of the classical arabic based on Quran recitations. Theory and Practice of Natural Computing, Lecture Notes in Computer Science (Vol. 8273: pp.\u00a057\u201367).","DOI":"10.1007\/978-3-642-45008-2_5"},{"key":"9456_CR41","doi-asserted-by":"crossref","first-page":"141","DOI":"10.1007\/978-3-319-01778-5_15","volume":"235","author":"OMY Elhadj","year":"2013","unstructured":"Elhadj, O. M. Y., Alghamdi, M., & Alkanhal, M. (2013b). Phoneme-based recognizer to assist reading the Holy Quran. Recent advances in intelligent informatics. Advances in Intelligent Systems and Computing, 235, 141\u2013152.","journal-title":"Advances in Intelligent Systems and Computing"},{"key":"9456_CR42","unstructured":"Elhadj, O. M. Y., Alsughayeir, I. A., Alghamdi, M., Alkanhal, M., Ohali, Y. M., & Alansari, A. M. (2012). Computerized teaching of the Holy Quran (in Arabic), Final Technical Report, King Abdulaziz City for Sciences and Technology (KACST), Riyadh, KSA."},{"issue":"5","key":"9456_CR8","first-page":"3239","volume":"11","author":"YOM Elhadj","year":"2016","unstructured":"Elhadj, Y. O. M., Khelifa, M. O. M., Yousfi, A., & Belkasmi, M. (2016). An accurate recognizer for basic arabic sounds. ARPN Journal of Engineering and Applied Sciences, 11(5), 3239\u20133243.","journal-title":"ARPN Journal of Engineering and Applied Sciences"},{"key":"9456_CR9","unstructured":"Ezzaidi, H. (2002). Discrimination Speech\/music and study of new parameters and models for a speaker identification system in the context of conference calls. (Ph.D. thesis, Chicoutimi: The University of Quebec at Chicoutimi; Department of Applied Science)."},{"key":"9456_CR10","unstructured":"Gargouri, D., Kammoun, M. A., & Hamida, A. B. (2006). A comparative study of formant frequencies estimation techniques. In Proceedings of the 5th WSEAS International Conference on Signal Processing, Istanbul, Turkey (pp. 15\u201319). May 27\u201329."},{"issue":"4","key":"9456_CR11","doi-asserted-by":"crossref","first-page":"1738","DOI":"10.1121\/1.399423","volume":"87","author":"H Hermansky","year":"1990","unstructured":"Hermansky, H. (1990). Perceptual linear predictive (PLP) analysis of speech. The Journal of the Acoustical Society of America, 87(4), 1738\u20131752.","journal-title":"The Journal of the Acoustical Society of America"},{"key":"9456_CR12","unstructured":"Hermansky, H., et al. (1991) Compensation for the effect of the communication channel in auditory-like analysis of speech (RASTAPLP). In EUROSPEECH Genova (Ed.), 1367\u20131370."},{"issue":"4","key":"9456_CR13","doi-asserted-by":"crossref","first-page":"578","DOI":"10.1109\/89.326616","volume":"2","author":"H Hermansky","year":"1994","unstructured":"Hermansky, H., & Morgan, N. (1994). RASTA of processing of speech. IEEE Transactions on Speech and Audio Processing, 2(4), 578\u2013589.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9456_CR14","doi-asserted-by":"crossref","unstructured":"Holmes, J., Holmes, W., & Garner, P. (1997). Using formant frequencies in speech recognition. In European Conference on Speech Communication and Technology, Rhodes, Greece (Vol. 4, pp. 2083\u20132086).","DOI":"10.21437\/Eurospeech.1997-551"},{"key":"9456_CR15","first-page":"155","volume-title":"New Challenges in Applied Intelligence Technologies, ser. Studies in Computational Intelligence","author":"H Iqbal","year":"2008","unstructured":"Iqbal, H., Awais, M., Masud, S., & Shamail, S. (2008). On vowels segmentation and identification using formant transitions in continuous recitation of Quranic Arabic. In New Challenges in Applied Intelligence Technologies, ser. Studies in Computational Intelligence (Vol. 134, pp.\u00a0155\u2013162). Berlin, Heidelberg: Springer."},{"key":"9456_CR16","volume-title":"A tutorial on principal components analysis. Institute for Nonlinear Science","author":"S Jonathon","year":"2005","unstructured":"Jonathon, S. (2005). A tutorial on principal components analysis. Institute for Nonlinear Science. San Diego: University of California."},{"key":"9456_CR17","volume-title":"Speech and language processing\u2014an introduction to natural language processing, computational linguistics, and speech recognition","author":"D Jurafsky","year":"2009","unstructured":"Jurafsky, D., & Martin, J. (2009). Speech and language processing\u2014an introduction to natural language processing, computational linguistics, and speech recognition. Upper Saddle River: Prentice Hall."},{"key":"9456_CR24","unstructured":"Khelifa, M. O. M., ElHadj, Y. O. M., Abdellah, Y., & Belkasmi, M. (2016). Enhancing Arabic phoneme recognizer using duration modeling techniques. In Proceedings of Fourth International Conference on Advances in Computing, Electronics and Communication\u2014ACEC Dec 15, 2016, Rome."},{"issue":"9","key":"9456_CR25","doi-asserted-by":"crossref","first-page":"35","DOI":"10.5120\/ijca2017915209","volume":"172","author":"MOM Khelifa","year":"2017","unstructured":"Khelifa, M. O. M., ElHadj, Y. O. M., Abdellah, Y., & Belkasmi, M. (2017a). Strategies for implementing an optimal ASR system for quranic recitation recognition. International Journal of Computer Applications, 172(9):35\u201341.","journal-title":"International Journal of Computer Applications,"},{"key":"9456_CR26","volume-title":"of The IEEE Ninth International conference on Advanced Computational Intelligence (ICACI 2017), Feb. 2","author":"MOM Khelifa","year":"2017","unstructured":"Khelifa, M. O. M., ElHadj, Y. O. M., Abdellah, Y., & Belkasmi, M. (2017b). An accurate HSMM-based system for Arabic phonemes recognition. In Proceedings of The IEEE Ninth International conference on Advanced Computational Intelligence (ICACI 2017), Feb. 2, Qatar: Doha."},{"key":"9456_CR27","doi-asserted-by":"publisher","DOI":"10.14569\/IJACSA.2017.080231","author":"MOM Khelifa","year":"2017","unstructured":"Khelifa, M. O. M., ElHadj, Y. O. M., Abdellah, Y., & Belkasmi, M. (2017c). Helpful statistics in recognizing basic Arabic phonemes. International Journal of Advanced Computer Science and Applications(ijacsa). doi: 10.14569\/IJACSA.2017.080231 .","journal-title":"International Journal of Advanced Computer Science and Applications(ijacsa)"},{"key":"9456_CR18","volume-title":"Extraction and representation of prosody for speaker, speech and language recognition","author":"M Leena","year":"2012","unstructured":"Leena, M. (2012). Extraction and representation of prosody for speaker, speech and language recognition. New York: Springer."},{"key":"9456_CR19","doi-asserted-by":"crossref","unstructured":"Liu, S., et al. (1998). The effect of fundamental frequency on mandarin speech recognition. In Proceedings of ICSLP, Sydney, Australia (Vol. 6).","DOI":"10.21437\/ICSLP.1998-761"},{"issue":"4","key":"9456_CR20","doi-asserted-by":"crossref","first-page":"561","DOI":"10.1109\/PROC.1975.9792","volume":"63","author":"J Makhoul","year":"1975","unstructured":"Makhoul, J., & Bolt, B. (1975). Newman, linear prediction: A tutorial review. Proceedings of IEEE, 63(4), 561\u2013580.","journal-title":"Proceedings of IEEE"},{"key":"9456_CR21","doi-asserted-by":"crossref","first-page":"782","DOI":"10.1016\/j.specom.2008.04.010","volume":"50","author":"L Mary","year":"2008","unstructured":"Mary, L., & Yegnanarayana, B. (2008). Extraction and representation of prosodic features for language and speaker recognition. Speech Communication, 50, 782\u2013796.","journal-title":"Speech Communication"},{"key":"9456_CR22","doi-asserted-by":"crossref","unstructured":"Meftah, A., Selouani, S., & Yousef, L. (2014). Preliminary Arabic speech emotion classification. In IEEE International Symposium on Signal Processing and Information Technology, Noida, India.","DOI":"10.1109\/ISSPIT.2014.7300584"},{"key":"9456_CR23","volume-title":"Wavelets: A conceptual overview.","author":"M Mitchell","year":"1994","unstructured":"Mitchell, M. (1994). Wavelets: A conceptual overview. Cambridge: Massachusetts Institute of Technology, Laboratory for Information and Decision Systems."},{"key":"9456_CR28","unstructured":"Povey, D., Ghoshal, A., Boulianne, G., Burget, L., Glembek, O., Goel, N., et al. (2011). The Kaldi Speech Recognition Toolkit. In IEEE 2011 workshop on automatic speech recognition and understanding (No. EPFL-CONF-192584). IEEE Signal Processing Society."},{"key":"9456_CR29","doi-asserted-by":"crossref","first-page":"399","DOI":"10.1109\/TASSP.1976.1162846","volume":"24","author":"L Rabiner","year":"1976","unstructured":"Rabiner, L., et al. (1976). A comparative performance study of several pitch detection algorithms. IEEE Transactions on Acoustics, Speech, and Signal Processing, 24, 399\u2013417.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"9456_CR30","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TASSP.1977.1162895","volume":"25","author":"L Rabiner","year":"1977","unstructured":"Rabiner, L. (1977). On the use of autocorrelation analysis for pitch detection. IEEE Transactions on Acoustics, Speech, and Signal Processing, 25, 1.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"9456_CR31","unstructured":"Schultz, T., & Black, A. (2008). Rapid language adaptation tools and technologies for multilingual speech processing. In Proceedings of ICASSP, Las Vegas, NV."},{"key":"9456_CR48","unstructured":"Sphinx-4 Java-based Speech Recognition Engine. (2017). http:\/\/cmusphinx.sourceforge.net\/sphinx4\/ . Accessed Nov 2017."},{"key":"9456_CR32","doi-asserted-by":"crossref","unstructured":"Stuttle, M., & Gales, M. (2002). Combining a Gaussian mixture model front end with MFCC parameters. In International Conference on Spoken Language Processing, Denver, Colorado (Vol. 3, pp. 1565\u20131568).","DOI":"10.21437\/ICSLP.2002-43"},{"key":"9456_CR33","doi-asserted-by":"crossref","unstructured":"Thomson, D., & Chengalvarayan, R. (1998). Use of periodicity and jitter as speech recognition feature. In Proceedings of the 1998 IEEE International Conference on acoustics, speech, and signal processing, Seattle, WA, (Vol. 1, pp. 21\u201324).","DOI":"10.1109\/ICASSP.1998.674357"},{"issue":"3\u20134","key":"9456_CR34","doi-asserted-by":"crossref","first-page":"197","DOI":"10.1016\/S0167-6393(01)00011-5","volume":"37","author":"D Thomson","year":"2002","unstructured":"Thomson, D., & Chengalvarayan, R. (2002). Use of voicing features in HMM-based speech recognition. Speech Communication, 37(3\u20134), 197\u2013211.","journal-title":"Speech Communication"},{"issue":"1","key":"9456_CR35","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1109\/89.554264","volume":"5","author":"S Vaseghi","year":"1997","unstructured":"Vaseghi, S., & Milner, B. (1997). Noise compensation methods for Hidden Markov Model speech recognition in adverse environments. IEEE Transactions on Speech and Audio Processing, 5(1), 11\u201321.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"key":"9456_CR36","unstructured":"Weber, K., Bourlard, H., & Bengio, S. (2001). Hmm2-extraction of formant features and their use for robust ASR. In European Conference on Speech Communication and Technology (pp. 607\u2013610)."},{"key":"9456_CR37","doi-asserted-by":"crossref","unstructured":"Welling, L., & Ney, H. (1996). A model for efficient formant estimation. In IEEE international conference on acoustics, speech, and signal processing, 2, pp.\u00a0797\u2013801.","DOI":"10.1109\/ICASSP.1996.543241"},{"key":"9456_CR38","unstructured":"Wong, P., Siu, M. (2004). Decision tree based tone modeling for Chinese speech recognition. In Proceedings of ICASSP, Montreal, Canada (Vol. 1, pp. 905\u2013908)."},{"key":"9456_CR43","doi-asserted-by":"crossref","first-page":"988","DOI":"10.1109\/29.1620","volume":"36","author":"WJ Yang","year":"1988","unstructured":"Yang, W. J., et al. (1988). Hidden Markov Model for Mandarin lexical tone recognition. IEEE Transactions On Acoustics, speech, and Signal Processing, 36, 988\u2013992.","journal-title":"IEEE Transactions On Acoustics, speech, and Signal Processing,"},{"key":"9456_CR44","volume-title":"HTK Book (V.3.4)","author":"S Young","year":"2009","unstructured":"Young, S., et al. (2009). HTK Book (V.3.4). Cambridge: Cambridge University Engineering Dept."},{"key":"9456_CR45","first-page":"2","volume":"3","author":"L Yousef","year":"2010","unstructured":"Yousef, L., & Amir, H. (2010). Comparative analysis of Arabic vowels using formants and an automatic speech recognition system. International Journal of Signal Processing, Image Processing and Pattern Recognition processing and Pattern Recognition, 3, 2.","journal-title":"International Journal of Signal Processing, Image Processing and Pattern Recognition processing and Pattern Recognition"},{"issue":"4","key":"9456_CR46","doi-asserted-by":"crossref","first-page":"393","DOI":"10.1007\/s10772-011-9119-z","volume":"14","author":"B Zaineb","year":"2011","unstructured":"Zaineb, B., & Ahmed, B. (2011). Combining formant frequency based on variable order LPC coding with acoustic features for TIMIT phone recognition. International Journal of Speech Technology, 14(4), 393\u2013403.","journal-title":"International Journal of Speech Technology"},{"key":"9456_CR47","doi-asserted-by":"crossref","unstructured":"Zolnay, A., Schl\u00fcter, R., & Ney, H. (2003). Extraction methods of voicing feature for robust speech recognition. In European conference on speech communication and technology (Vol. 1, pp.\u00a0497\u2013500). Geneva.","DOI":"10.21437\/Eurospeech.2003-179"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-017-9456-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-017-9456-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-017-9456-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,3]],"date-time":"2022-08-03T01:58:32Z","timestamp":1659491912000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-017-9456-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,9,20]]},"references-count":48,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2017,12]]}},"alternative-id":["9456"],"URL":"https:\/\/doi.org\/10.1007\/s10772-017-9456-7","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,9,20]]}}}