{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T06:18:02Z","timestamp":1774419482262,"version":"3.50.1"},"reference-count":46,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,6]]},"DOI":"10.1109\/icassp49660.2025.10889643","type":"proceedings-article","created":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T17:15:02Z","timestamp":1741799702000},"page":"1-5","source":"Crossref","is-referenced-by-count":1,"title":["Open Automatic Speech Recognition Models for Classical and Modern Standard Arabic"],"prefix":"10.1109","author":[{"given":"Lilit","family":"Grigoryan","sequence":"first","affiliation":[{"name":"NVIDIA,Yerevan,Armenia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nikolay","family":"Karpov","sequence":"additional","affiliation":[{"name":"NVIDIA,Yerevan,Armenia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Enas","family":"Albasiri","sequence":"additional","affiliation":[{"name":"NVIDIA,Santa Clara,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vitaly","family":"Lavrukhin","sequence":"additional","affiliation":[{"name":"NVIDIA,Santa Clara,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Boris","family":"Ginsburg","sequence":"additional","affiliation":[{"name":"NVIDIA,Santa Clara,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Nvidia nemo: A toolkit for conversational ai","author":"Corporation","year":"2020"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICEEC.2004.1374575"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICM.2003.237884"},{"key":"ref4","first-page":"1264","article-title":"New hybrid system (supervised classifier\/hmm) for isolated arabic speech recognition","author":"Hocine","year":"2006"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2014.7078629"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.9734\/bjmcs\/2016\/23034"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2014-474"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2016.7846280"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/s10772-021-09847-7"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-017-9402-y"},{"key":"ref11","first-page":"3057","article-title":"Development of a TV broadcasts speech recognition system for qatari Arabic","volume-title":"Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC\u201914)","author":"Elmahdy"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1142\/9789813229396_0011"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1515\/comp-2019-0004"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2019-2599"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1049\/sil2.12057"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1809"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.arabicnlp-1.10"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2021.101272"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2016.7846277"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2015-647"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2014-80"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2021.101272"},{"key":"ref24","article-title":"data2vec: A general framework for self-supervised learning in speech, vision and language","author":"Baevski","year":"2022"},{"key":"ref25","first-page":"4218","article-title":"Common voice: A massively-multilingual speech corpus","volume-title":"Proceedings of the Twelfth Language Resources and Evaluation Conference","author":"Ardila"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1044"},{"key":"ref27","article-title":"Robust speech recognition via large-scale weak supervision","author":"Radford","year":"2022"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/slt54892.2023.10023141"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.arabicnlp-1.5"},{"key":"ref30","article-title":"Mediaspeech: Multilanguage asr benchmark and dataset","author":"Kolobov","year":"2021"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/icassp40776.2020.9053889"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/SLT54892.2023.10022652"},{"key":"ref33","article-title":"Deep speech: Scaling up end-to-end speech recognition","author":"Hannun","year":"2014"},{"key":"ref34","article-title":"Kaldi speech recognition toolkit","author":"Povey","year":"2011"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389701"},{"key":"ref39","article-title":"Tarteel ai\u2019s everyayah dataset"},{"key":"ref40","article-title":"Speech data processor (sdp) toolkit","author":"Corporation","year":"2024"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1571"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-24797-2"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref44","article-title":"Stt en fastconformer hybrid transducer-ctc large p&c","author":"Corporation","year":"2020"},{"key":"ref45","article-title":"Stt es fastconformer hybrid transducer-ctc large p&c","year":"2020"},{"key":"ref46","article-title":"Seamlessm4t: Massively multilingual & multimodal machine translation","year":"2023"}],"event":{"name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Hyderabad, India","start":{"date-parts":[[2025,4,6]]},"end":{"date-parts":[[2025,4,11]]}},"container-title":["ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10887540\/10887541\/10889643.pdf?arnumber=10889643","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T05:23:03Z","timestamp":1774416183000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10889643\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,6]]},"references-count":46,"URL":"https:\/\/doi.org\/10.1109\/icassp49660.2025.10889643","relation":{},"subject":[],"published":{"date-parts":[[2025,4,6]]}}}