{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,9]],"date-time":"2025-10-09T01:04:34Z","timestamp":1759971874147,"version":"build-2065373602"},"reference-count":61,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"Saudi Data and AI Authority and King Fahd University of Petroleum and Minerals through SDAIA\u2013KFUPM Joint Research Center for Artificial Intelligence","award":["JRC-AI-UCG-07"],"award-info":[{"award-number":["JRC-AI-UCG-07"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/access.2025.3616181","type":"journal-article","created":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T17:39:47Z","timestamp":1759253987000},"page":"171468-171492","source":"Crossref","is-referenced-by-count":0,"title":["Is This the Best Prompt? Scoring Prompts for Arabic NLP Across LLMs"],"prefix":"10.1109","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4599-8797","authenticated-orcid":false,"given":"Dania","family":"Refai","sequence":"first","affiliation":[{"name":"Information and Computer Science Department, King Fahd University of Petroleum and Minerals (KFUPM), Dhahran, Saudi Arabia"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-1364-086X","authenticated-orcid":false,"given":"Maged S.","family":"Al-Shaibani","sequence":"additional","affiliation":[{"name":"SDAIA&#x2013;KFUPM Joint Research Center for Artificial Intelligence, KFUPM, Dhahran, Saudi Arabia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8311-1731","authenticated-orcid":false,"given":"Irfan","family":"Ahmad","sequence":"additional","affiliation":[{"name":"Information and Computer Science Department, King Fahd University of Petroleum and Minerals (KFUPM), Dhahran, Saudi Arabia"}]}],"member":"263","reference":[{"key":"ref1","article-title":"GPT-4o system card","author":"Hurst","year":"2024","journal-title":"arXiv:2410.21276"},{"key":"ref2","article-title":"LLaMA: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"arXiv:2302.13971"},{"key":"ref3","article-title":"ALLaM: Large language models for Arabic and English","author":"Bari","year":"2024","journal-title":"arXiv:2407.15390"},{"key":"ref4","article-title":"Automatic detection of LLM-generated code: A case study of claude 3 Haiku","author":"Rahman","year":"2024","journal-title":"arXiv:2409.01382"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.3390\/info16080688"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3605943"},{"key":"ref7","article-title":"The prompt report: A systematic survey of prompt engineering techniques","volume-title":"arXiv:2406.06608","author":"Schulhoff","year":"2024"},{"key":"ref8","article-title":"A systematic survey of prompt engineering in large language models: Techniques and applications","author":"Sahoo","year":"2024","journal-title":"arXiv:2402.07927"},{"key":"ref9","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wei"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.147"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.106"},{"key":"ref12","article-title":"Self-consistency improves chain of thought reasoning in language models","author":"Wang","year":"2022","journal-title":"arXiv:2203.11171"},{"key":"ref13","article-title":"A survey of prompt engineering methods in large language models for different NLP tasks","author":"Vatsal","year":"2024","journal-title":"arXiv:2407.12994"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.heliyon.2024.e39786"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3560815"},{"key":"ref16","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Brown"},{"issue":"8","key":"ref17","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3411763.3451760"},{"key":"ref19","first-page":"22199","article-title":"Large language models are zero-shot reasoners","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","volume":"35","author":"Kojima"},{"key":"ref20","first-page":"11809","article-title":"Tree of thoughts: Deliberate problem solving with large language models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yao"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i16.29720"},{"key":"ref22","article-title":"Complexity-based prompting for multi-step reasoning","author":"Fu","year":"2022","journal-title":"arXiv:2210.00720"},{"key":"ref23","article-title":"Least-to-most prompting enables complex reasoning in large language models","author":"Zhou","year":"2022","journal-title":"arXiv:2205.10625"},{"key":"ref24","article-title":"Think before you speak: Training language models with pause tokens","author":"Goyal","year":"2023","journal-title":"arXiv:2310.02226"},{"key":"ref25","article-title":"Automatic chain of thought prompting in large language models","author":"Zhang","year":"2022","journal-title":"arXiv:2210.03493"},{"article-title":"Large language models are human-level prompt engineers","volume-title":"Proc. 11th Int. Conf. Learn. Represent.","author":"Zhou","key":"ref26"},{"key":"ref27","article-title":"TEMPERA: Test-time prompting via reinforcement learning","author":"Zhang","year":"2022","journal-title":"arXiv:2211.11890"},{"key":"ref28","article-title":"DSPy: Compiling declarative language model calls into self-improving pipelines","author":"Khattab","year":"2023","journal-title":"arXiv:2310.03714"},{"key":"ref29","article-title":"Large language models as optimizers","author":"Yang","year":"2023","journal-title":"arXiv:2309.03409"},{"key":"ref30","first-page":"11054","article-title":"True few-shot learning with language models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Perez"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.442"},{"key":"ref32","article-title":"Zero-label prompt selection","author":"Liao","year":"2022","journal-title":"arXiv:2211.04668"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.60"},{"key":"ref34","article-title":"Automatic prompt selection for large language models","author":"Do","year":"2024","journal-title":"arXiv:2404.02717"},{"key":"ref35","article-title":"Chain-of-knowledge: Grounding large language models via dynamic knowledge adapting over heterogeneous sources","author":"Li","year":"2023","journal-title":"arXiv:2305.13269"},{"key":"ref36","article-title":"Interleaving retrieval with chain-of-thought reasoning for knowledge-intensive multi-step questions","author":"Trivedi","year":"2022","journal-title":"arXiv:2212.10509"},{"key":"ref37","article-title":"Boosted prompt ensembles for large language models","author":"Pitis","year":"2023","journal-title":"arXiv:2304.05970"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00681"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.24"},{"key":"ref40","article-title":"Few-shot learning with multilingual language models","author":"Victoria Lin","year":"2021","journal-title":"arXiv:2112.10668"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00633"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.mrl-1.1"},{"key":"ref43","article-title":"Prompting GPT-3 to be reliable","author":"Si","year":"2022","journal-title":"arXiv:2210.09150"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.eval4nlp-1.14"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642216"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.258"},{"key":"ref47","article-title":"Cross-lingual auto evaluation for assessing multilingual LLMs","author":"Doddapaneni","year":"2024","journal-title":"arXiv:2410.13394"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/3689217.3690621"},{"key":"ref49","article-title":"When punctuation matters: A large-scale comparison of prompt robustness methods for LLMs","author":"Seleznyov","year":"2025","journal-title":"arXiv:2508.11383"},{"key":"ref50","article-title":"ARB: A comprehensive Arabic multimodal reasoning benchmark","author":"Ghaboura","year":"2025","journal-title":"arXiv:2505.17021"},{"key":"ref51","article-title":"AraSTEM: A native Arabic multiple choice question benchmark for evaluating LLMs knowledge in STEM subjects","author":"Mustapha","year":"2024","journal-title":"arXiv:2501.00559"},{"key":"ref52","article-title":"AraReasoner: Evaluating reasoning-based LLMs for Arabic NLP","author":"Hasanaath","year":"2025","journal-title":"arXiv:2506.08768"},{"key":"ref53","article-title":"Efficient multi-prompt evaluation of LLMs","author":"Maia Polo","year":"2024","journal-title":"arXiv:2405.17202"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-60042-0_66"},{"volume-title":"Arabic_hate_Speech: Dataset for Detecting Hate Speech in Arabic Text","year":"2024","key":"ref55"},{"volume-title":"Arcovidvac: Arabic COVID-19 Vaccine Sentiment Analysis Dataset","year":"2024","key":"ref56"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-77116-8_8"},{"key":"ref58","first-page":"1","article-title":"ArSarcasm v2: A large-scale Arabic sarcasm detection dataset","volume-title":"Proc. Int. Conf. Social Media Soc. (SMSociety)","author":"Abu Farha"},{"key":"ref59","first-page":"3645","article-title":"Shami: A corpus of levantine Arabic dialects","volume-title":"Proc. 11th Int. Conf. Lang. Resour. Eval.","author":"Kwaik"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-96-0576-7_30"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1007\/BF02365362"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10820123\/11184831.pdf?arnumber=11184831","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T17:39:08Z","timestamp":1759945148000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11184831\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":61,"URL":"https:\/\/doi.org\/10.1109\/access.2025.3616181","relation":{},"ISSN":["2169-3536"],"issn-type":[{"type":"electronic","value":"2169-3536"}],"subject":[],"published":{"date-parts":[[2025]]}}}