{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T03:31:13Z","timestamp":1777951873379,"version":"3.51.4"},"reference-count":25,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T00:00:00Z","timestamp":1769817600000},"content-version":"vor","delay-in-days":30,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Procedia Computer Science"],"published-print":{"date-parts":[[2026]]},"DOI":"10.1016\/j.procs.2026.01.043","type":"journal-article","created":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T19:30:19Z","timestamp":1774035019000},"page":"351-358","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Domain-Specific Adaptation of Vision-Language Models for Arabic OCR"],"prefix":"10.1016","volume":"275","author":[{"given":"Amr","family":"Elkousy","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Youssef","family":"Elasrigy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Abdelrahman","family":"Ammar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mohamed","family":"Ibrahim","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mariam M.N.","family":"Aboelwafa","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.procs.2026.01.043_bib1","unstructured":"A. Nazif, A system for the recognition of the printed arabic characters, Master\u2019s thesis, Faculty of Engineering, Cairo University (1975)."},{"key":"10.1016\/j.procs.2026.01.043_bib2","unstructured":"M. Pechwitz, V. M\u00e4rgner, Hmm-based approach for handwritten arabic word recognition using the ifn\/enit database, in:Proceedings of the International Conference on Document Analysis and Recognition (ICDAR), 2003."},{"key":"10.1016\/j.procs.2026.01.043_bib3","series-title":"Ifn\/enit-database of handwritten arabic words, in:Proc. of CIFED, Vol. 2","first-page":"127","author":"Pechwitz","year":"2002"},{"key":"10.1016\/j.procs.2026.01.043_bib4","unstructured":"University at Buffalo, Offline arabic handwriting recognition: A survey, Tech. rep., discussing IFN\/ENIT and HMM usage (2006)."},{"issue":"7","key":"10.1016\/j.procs.2026.01.043_bib5","first-page":"145","article-title":"Arabic handwriting word recognition based on convolutional recurrent neural network","volume":"13","author":"Boualam","year":"2022","journal-title":"International Journal of Advanced Computer Science and Applications"},{"key":"10.1016\/j.procs.2026.01.043_bib6","unstructured":"A. Elsayed, A. Ahmed, M. Khalifa, Arabic handwritten text recognition using advanced cnn\u2013rnn architecture, in: Proceedings of the International Conference on Artificial Intelligence and Applications, 2024."},{"issue":"3","key":"10.1016\/j.procs.2026.01.043_bib7","doi-asserted-by":"crossref","first-page":"1096","DOI":"10.1016\/j.patcog.2013.08.009","article-title":"Khatt: An open arabic offline handwritten text database","volume":"47","author":"Mahmoud","year":"2014","journal-title":"Pattern Recognition"},{"key":"10.1016\/j.procs.2026.01.043_bib8","first-page":"107947","article-title":"Arabic handwritten alphabets","volume":"41","author":"Khan","year":"2022","journal-title":"words and paragraphs per user (ahawp) dataset, Data in Brief"},{"key":"10.1016\/j.procs.2026.01.043_bib9","doi-asserted-by":"crossref","unstructured":"S. Momeni, A. BabaAli, Transformer transducer for offline handwritten arabic text recognition, arXiv preprint arXiv:2307.15045 (2023).","DOI":"10.1007\/s11760-023-02970-9"},{"key":"10.1016\/j.procs.2026.01.043_bib10","unstructured":"M. El-Sherif, A. Hassan, A. Fathi, H. Youssef, Hatformer: Hierarchical attention transformer for historical arabic handwritten text recognition, arXiv preprint arXiv:2410.02179 (2024)."},{"key":"10.1016\/j.procs.2026.01.043_bib11","unstructured":"O. Li, Y. Zhang, F. Wei, Survey on vision\u2013language models: Methods, applications, and challenges, arXiv preprint arXiv:2401.13601 (2024)."},{"key":"10.1016\/j.procs.2026.01.043_bib12","unstructured":"OpenAI, Gpt-4 technical report, arXiv preprint arXiv:2303.08774 (2023)."},{"key":"10.1016\/j.procs.2026.01.043_bib13","unstructured":"Google DeepMind, Gemini 1.5 technical report, arXiv preprint arXiv:2403.05530 (2024)."},{"key":"10.1016\/j.procs.2026.01.043_bib14","first-page":"629","article-title":"An overview of the tesseract ocr engine","volume":"2","author":"Smith","year":"2007","journal-title":"in: Proceedings of the 9th International Conference on Document Analysis and Recognition (ICDAR)"},{"key":"10.1016\/j.procs.2026.01.043_bib15","unstructured":"JaidedAI Team, Easyocr: Ready-to-use ocr with 80+ languages supported, https:\/\/github.com\/JaidedAI\/EasyOCR (2021)."},{"key":"10.1016\/j.procs.2026.01.043_bib16","unstructured":"mssqpi, Arabic-ocr dataset, https:\/\/huggingface.co\/datasets\/mssqpi\/Arabic-OCR-Dataset (2023)."},{"key":"10.1016\/j.procs.2026.01.043_bib17","first-page":"58525","article-title":"Muharaf: Manuscripts of handwritten arabic dataset for cursive text recognition","volume":"37","author":"Saeed","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.procs.2026.01.043_bib18","doi-asserted-by":"crossref","unstructured":"A. Heakl, A. Sohail, M. Ranjan, R. Hossam, G. S. Ahmad, M. El-Geish, O. Maher, Z. Shen, F. Khan, S. Khan, Kitab-bench: A comprehensive multi-domain benchmark for arabic ocr and document understanding, arXiv preprint arXiv:2502.14949 (2025).","DOI":"10.18653\/v1\/2025.findings-acl.1135"},{"key":"10.1016\/j.procs.2026.01.043_bib19","unstructured":"Qwen Team, Qwen2.5-vl github repository, https:\/\/github.com\/QwenLM\/Qwen2.5-VL (2024)."},{"key":"10.1016\/j.procs.2026.01.043_bib20","unstructured":"A. Wasfy, O. Nacar, A. Elkhateb, M. Reda, O. Elshehy, A. Ammar, W. Boulila, Qari-ocr: High fidelity arabic text recognition through multimodal large language model adaptation, arXiv preprint arXiv:2506.02295 (2025)."},{"key":"10.1016\/j.procs.2026.01.043_bib21","unstructured":"Mistral AI, Mistral ocr: An optical character recognition api, https:\/\/mistral.ai\/news\/mistral-ocr (Mar. 2025)."},{"key":"10.1016\/j.procs.2026.01.043_bib22","unstructured":"T. Doshi, Gemini 2.5: Our most intelligent models are getting even better, Recuperado el 15 (2025)."},{"key":"10.1016\/j.procs.2026.01.043_bib23","unstructured":"aamijar, Muharaf-public dataset, https:\/\/huggingface.co\/datasets\/aamijar\/muharaf-public (2024)."},{"key":"10.1016\/j.procs.2026.01.043_bib24","unstructured":"E. J. Hu, Y. Shen, P. Wallis, Z. Allen-Zhu, Y. Li, S. Wang, L. Wang, W. Chen, Lora: Low-rank adaptation of large language models, arXiv preprint arXiv:2106.09685 (2021)."},{"key":"10.1016\/j.procs.2026.01.043_bib25","doi-asserted-by":"crossref","unstructured":"T. Dettmers, A. Pagnoni, A. Holtzman, L. Zettlemoyer, Qlora: Efficient finetuning of quantized llms, arXiv preprint arXiv:2305.14314 (2023).","DOI":"10.52202\/075280-0441"}],"container-title":["Procedia Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1877050926000438?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1877050926000438?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T11:18:05Z","timestamp":1777893485000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1877050926000438"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":25,"alternative-id":["S1877050926000438"],"URL":"https:\/\/doi.org\/10.1016\/j.procs.2026.01.043","relation":{},"ISSN":["1877-0509"],"issn-type":[{"value":"1877-0509","type":"print"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Domain-Specific Adaptation of Vision-Language Models for Arabic OCR","name":"articletitle","label":"Article Title"},{"value":"Procedia Computer Science","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.procs.2026.01.043","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Author(s). Published by Elsevier B.V.","name":"copyright","label":"Copyright"}]}}