{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T15:56:36Z","timestamp":1774454196271,"version":"3.50.1"},"publisher-location":"Cham","reference-count":21,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032079589","type":"print"},{"value":"9783032079596","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T00:00:00Z","timestamp":1760313600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T00:00:00Z","timestamp":1760313600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-07959-6_6","type":"book-chapter","created":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T09:22:20Z","timestamp":1760260940000},"page":"70-84","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Arabic ASR on\u00a0the\u00a0SADA Large-Scale Arabic Speech Corpus with\u00a0Transformer-Based Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2498-6831","authenticated-orcid":false,"given":"Branislav","family":"Gerazov","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3195-5497","authenticated-orcid":false,"given":"Marcello","family":"Politi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8117-0153","authenticated-orcid":false,"given":"S\u00e9bastien","family":"Brati\u00e8res","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,13]]},"reference":[{"key":"6_CR1","unstructured":"Abdelhamid, A.A., Alsayadi, H.A., Hegazy, I., Fayed, Z.T.: End-to-end Arabic speech recognition: a review. In: Proceedings of the 19th Conference of Language Engineering, pp. 26\u201330 (2020)"},{"key":"6_CR2","doi-asserted-by":"crossref","unstructured":"Alharbi, S., et\u00a0al.: SADA: Saudi audio dataset for Arabic. In: ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 10286\u201310290. IEEE (2024)","DOI":"10.1109\/ICASSP48485.2024.10446243"},{"key":"6_CR3","unstructured":"Ardila, R., et al.: Common voice: a massively-multilingual speech corpus. LREC (2020)"},{"key":"6_CR4","doi-asserted-by":"crossref","unstructured":"Babu, A., Wang, C., Tjandra, A., Lakhotia, K., Xu, Q., Goyal, N., Singh, K., Von\u00a0Platen, P., Saraf, Y., Pino, J., et\u00a0al.: XLS-R: Self-supervised cross-lingual speech representation learning at scale. arXiv preprint arXiv:2111.09296 (2021)","DOI":"10.21437\/Interspeech.2022-143"},{"key":"6_CR5","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., Auli, M.: wav2vec 2.0: a framework for self-supervised learning of speech representations. Adv. Neural Inf. Process. Syst. 33, 12449\u201312460 (2020)"},{"key":"6_CR6","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2024.103110","volume":"163","author":"FZ Besdouri","year":"2024","unstructured":"Besdouri, F.Z., Zribi, I., Belguith, L.H.: Arabic automatic speech recognition: challenges and progress. Speech Commun. 163, 103110 (2024)","journal-title":"Speech Commun."},{"key":"6_CR7","doi-asserted-by":"crossref","unstructured":"Chowdhury, S.A., Hussein, A., Abdelali, A., Ali, A.: Towards one model to rule all: Multilingual strategy for dialectal code-switching Arabic ASR. arXiv preprint arXiv:2105.14779 (2021)","DOI":"10.21437\/Interspeech.2021-1809"},{"key":"6_CR8","doi-asserted-by":"crossref","unstructured":"Conneau, A., Baevski, A., Collobert, R., Mohamed, A., Auli, M.: Unsupervised cross-lingual representation learning for speech recognition. arXiv preprint arXiv:2006.13979 (2020)","DOI":"10.21437\/Interspeech.2021-329"},{"issue":"17","key":"6_CR9","doi-asserted-by":"publisher","first-page":"8898","DOI":"10.3390\/app12178898","volume":"12","author":"A Dhouib","year":"2022","unstructured":"Dhouib, A., Othman, A., Ghoul, O., Khribi, M.K., Al Sinani, A.: Arabic automatic speech recognition: a systematic literature review. Appl. Sci. 12(17), 8898 (2022)","journal-title":"Appl. Sci."},{"key":"6_CR10","doi-asserted-by":"crossref","unstructured":"Gulati, A., et\u00a0al.: Conformer: Convolution-augmented transformer for speech recognition. arXiv preprint arXiv:2005.08100 (2020)","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"6_CR11","unstructured":"Halabi, N., et\u00a0al.: Arabic speech corpus. Oxford Text Archive Core Collection (2016)"},{"key":"6_CR12","unstructured":"Heafield, K.: KenLM: Faster and smaller language model queries. In: Proceedings of the Sixth Workshop on Statistical Machine Translation, pp. 187\u2013197. Association for Computational Linguistics, Edinburgh (2011). https:\/\/www.aclweb.org\/anthology\/W11-2123"},{"key":"6_CR13","unstructured":"Heafield, K., Pouzyrevsky, I., Clark, J.H., Koehn, P.: Scalable modified Kneser-Ney language model estimation. In: Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics, vol. 2: Short Papers, pp. 690\u2013696. Association for Computational Linguistics, Sofia (2013). https:\/\/www.aclweb.org\/anthology\/P13-2121"},{"key":"6_CR14","unstructured":"Khan, H.I., Abid, A., Moussa, M.M., Abou-Allaban, A.: The Tarteel dataset: crowd-sourced and labeled Quranic recitation (2021)"},{"key":"6_CR15","unstructured":"Povey, D., et\u00a0al.: The Kaldi speech recognition toolkit. In: IEEE 2011 Workshop on Automatic Speech Recognition and Understanding, Hawaii, vol.\u00a01, p.\u00a05-1 (2011)"},{"key":"6_CR16","unstructured":"Pratap, V., et al.: Scaling speech technology to 1,000+ languages. arXiv (2023)"},{"key":"6_CR17","unstructured":"Radford, A., Kim, J.W., Xu, T., Brockman, G., McLeavey, C., Sutskever, I.: Robust speech recognition via large-scale weak supervision. In: International Conference on Machine Learning, pp. 28492\u201328518. PMLR (2023)"},{"issue":"10","key":"6_CR18","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pcbi.1008228","volume":"16","author":"T Sainburg","year":"2020","unstructured":"Sainburg, T., Thielk, M., Gentner, T.Q.: Finding, visualizing, and quantifying latent structure across diverse animal vocal repertoires. PLoS Comput. Biol. 16(10), e1008228 (2020)","journal-title":"PLoS Comput. Biol."},{"key":"6_CR19","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"6_CR20","unstructured":"Wang, Y., Alhmoud, A., Alqurishi, M.: Open universal Arabic ASR leaderboard. arXiv preprint arXiv:2412.13788 (2024)"},{"issue":"175","key":"6_CR21","first-page":"12","volume":"3","author":"S Young","year":"2002","unstructured":"Young, S., et al.: The HTK book. Cambridge Univ. Eng. Depart. 3(175), 12 (2002)","journal-title":"Cambridge Univ. Eng. Depart."}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-07959-6_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T09:22:26Z","timestamp":1760260946000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-07959-6_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,13]]},"ISBN":["9783032079589","9783032079596"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-07959-6_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,13]]},"assertion":[{"value":"13 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Szeged","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hungary","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/specom.inf.u-szeged.hu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}