{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T01:10:26Z","timestamp":1763341826402,"version":"3.45.0"},"reference-count":52,"publisher":"Tech Science Press","issue":"2","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["CMC"],"published-print":{"date-parts":[[2025]]},"DOI":"10.32604\/cmc.2025.059870","type":"journal-article","created":{"date-parts":[[2025,4,2]],"date-time":"2025-04-02T05:44:36Z","timestamp":1743572676000},"page":"3471-3491","source":"Crossref","is-referenced-by-count":1,"title":["Leveraging Unlabeled Corpus for Arabic Dialect Identification"],"prefix":"10.32604","volume":"83","author":[{"given":"Mohammed","family":"Abdelmajeed","sequence":"first","affiliation":[]},{"given":"Jiangbin","family":"Zheng","sequence":"additional","affiliation":[]},{"given":"Ahmed","family":"Murtadha","sequence":"additional","affiliation":[]},{"given":"Youcef","family":"Nafa","sequence":"additional","affiliation":[]},{"given":"Mohammed","family":"Abaker","sequence":"additional","affiliation":[]},{"given":"Muhammad Pervez","family":"Akhter","sequence":"additional","affiliation":[]}],"member":"17807","published-online":{"date-parts":[[2025]]},"reference":[{"year":"2018 Aug","author":"Salameh","journal-title":"Fine-grained arabic dialect identification","key":"ref1"},{"year":"2019 Aug","author":"Bouamor","journal-title":"The MADAR shared task on arabic fine-grained dialect identification","key":"ref2"},{"year":"2018 May","author":"Abdul-Mageed","journal-title":"You tweet what you speak: a city-level dataset of arabic dialects","key":"ref3"},{"year":"2013 Aug","author":"Elfardy","journal-title":"Sentence level dialect identification in Arabic","key":"ref4"},{"year":"2016 Dec","author":"Ionescu","journal-title":"UnibucKernel: an approach for Arabic dialect identification based on multiple string kernels","key":"ref5"},{"year":"2016","author":"Malmasi","journal-title":"Computational linguistics","key":"ref6"},{"year":"2019 Aug","author":"Elaraby","journal-title":"A character level convolutional BiLSTM for Arabic dialect identification","key":"ref7"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"256","DOI":"10.1016\/j.procs.2017.10.117","article-title":"AraVec: a set of Arabic word embedding models for use in Arabic NLP","volume":"117","author":"Soliman","year":"2017","journal-title":"Procedia Comput Sci"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"6115","DOI":"10.1007\/s00521-022-07944-5","article-title":"A transformer fine-tuning strategy for text dialect identification","volume":"35","author":"Humayun","year":"2023","journal-title":"Neural Comput Appl"},{"year":"2020 Dec","author":"Mansour","journal-title":"Arabic dialect identification using BERT fine-tuning","key":"ref10"},{"year":"2022 Dec","author":"Attieh","journal-title":"Arabic dialect identification and sentiment classification using transformer-based models","key":"ref11"},{"key":"ref12","first-page":"4171","author":"Devlin","year":"2019","journal-title":"BERT: pre-training of deep bidirectional transformers for language understanding"},{"year":"2020 Jul","author":"Conneau","journal-title":"Unsupervised cross-lingual representation learning at scale","key":"ref13"},{"year":"2022 May","author":"Feng","journal-title":"Language-agnostic BERT sentence embedding","key":"ref14"},{"key":"ref15","first-page":"9","author":"Antoun","year":"2020 May","journal-title":"AraBERT: transformer-based model for Arabic language understanding"},{"year":"2020","author":"Dadas","journal-title":"Artificial intelligence and soft computing","key":"ref16"},{"unstructured":"Vries  WD, Van Cranenburgh A, Bisazza A, Caselli T, Noord GV, Nissim MJA. BERTje: a dutch BERT Model. arXiv:1912.09582. 2019.","key":"ref17"},{"unstructured":"Virtanen A, Kanerva J, Ilo R, Luoma J, Luotolahti J, Salakoski T, et al. Multilingual is not enough: BERT for finnish. arXiv:1912.07076. 2019.","key":"ref18"},{"key":"ref19","series-title":"Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics","first-page":"7088","article-title":"ARBERT & MARBERT: deep bidirectional transformers for Arabic","author":"Abdul-Mageed","year":"2021"},{"year":"2021 Apr","author":"Inoue","journal-title":"The interplay of variant, size, and task type in Arabic pre-trained language models","key":"ref20"},{"key":"ref21","series-title":"International Committee for Computational Linguistics","first-page":"2054","article-title":"KUISAIL at SemEval-2020 Task 12: BERT-CNN for offensive speech identification in social media","author":"Safaya","year":"2020"},{"doi-asserted-by":"crossref","unstructured":"Lan W, Chen Y, Xu W, Ritter A. An empirical study of pre-trained transformers for Arabic information extraction. Online: Association for Computational Linguistics; 2020. p. 4727\u201334.","key":"ref22","DOI":"10.18653\/v1\/2020.emnlp-main.382"},{"unstructured":"Talafha B, Ali M, Za\u2019ter ME, Seelawi H, Tuffaha I, Samir M, et al. Multi-dialect Arabic BERT for country-level dialect identification arXiv:2007.05612. 2020.","key":"ref23"},{"key":"ref24","series-title":"Proceedings of the Seventh Arabic Natural Language Processing Workshop (WANLP)","first-page":"452","article-title":"Dialect & sentiment identification in nuanced Arabic tweets using an ensemble of prompt-based, fine-tuned, and multitask BERT-based models","author":"Abdel-Salam","year":"2022"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"101488","DOI":"10.1016\/j.csl.2023.101488","article-title":"A three-stage neural model for Arabic dialect identification","volume":"80","author":"Mohammed","year":"2023","journal-title":"Comput Speech Lang"},{"doi-asserted-by":"crossref","unstructured":"Garg S, Ramakrishnan G. BAE: BERT-based adversarial examples for text classification. Online: Association for Computational Linguistics; 2020. p. 6174\u201381.","key":"ref26","DOI":"10.18653\/v1\/2020.emnlp-main.498"},{"doi-asserted-by":"crossref","unstructured":"Croce D, Castellucci G, Basili R. GAN-BERT: generative adversarial learning for robust text classification with a bunch of labeled examples. Online: Association for Computational Linguistics; 2020. p. 2114\u20139.","key":"ref27","DOI":"10.18653\/v1\/2020.acl-main.191"},{"key":"ref28","first-page":"2672","article-title":"Generative adversarial nets","volume":"2","author":"Goodfellow","year":"2014","journal-title":"NIPS\u201914: Proc 28th Int Conf Neural Inform Process Syst"},{"doi-asserted-by":"crossref","unstructured":"Hendrycks D, Liu X, Wallace E, Dziedzic A, Krishnan R, Song D. Pretrained transformers improve out-of-distribution robustness. Online: Association for Computational Linguistics; 2020. p. 2744\u201351.","key":"ref29","DOI":"10.18653\/v1\/2020.acl-main.244"},{"key":"ref30","series-title":"Proceedings of the 28th International Conference on Computational Linguistics","first-page":"6838","article-title":"Neural unsupervised domain adaptation in NLP\u2014a survey","author":"Ramponi","year":"2020"},{"doi-asserted-by":"crossref","unstructured":"Vu T-T, Phung D, Haffari G. Effective unsupervised domain adaptation with adversarially trained language models. Online: Association for Computational Linguistics; 2020. p. 6163\u201373.","key":"ref31","DOI":"10.18653\/v1\/2020.emnlp-main.497"},{"doi-asserted-by":"crossref","unstructured":"Ye H, Tan Q, He R, Li J, Ng HT, Bing LJA. Feature adaptation of pre-trained language models across languages and domains for text classification. arXiv:2009.11538. 2020.","key":"ref32","DOI":"10.18653\/v1\/2020.emnlp-main.599"},{"key":"ref33","series-title":"Proceedings of the Second Workshop on Domain Adaptation for NLP","first-page":"9","article-title":"Pseudo-label guided unsupervised domain adaptation of contextual embeddings","author":"Chen","year":"2021"},{"key":"ref34","series-title":"Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing","first-page":"196","article-title":"Arabic dialect identification with a few labeled examples using generative adversarial networks","author":"Yusuf","year":"2020"},{"key":"ref35","series-title":"Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","first-page":"2824","article-title":"Domain adaptation for arabic cross-domain and cross-dialect sentiment analysis from contextualized word embedding","author":"El Mekki","year":"2021"},{"key":"ref36","doi-asserted-by":"crossref","first-page":"102964","DOI":"10.1016\/j.ipm.2022.102964","article-title":"AdaSL: an unsupervised domain adaptation framework for Arabic multi-dialectal sequence labeling","volume":"59","author":"El Mekki","year":"2022","journal-title":"Inform Process Manage"},{"key":"ref37","series-title":"Proceedings of the Second Arabic Natural Language Processing Conference","first-page":"709","article-title":"NADI 2024: the fifth nuanced arabic dialect identification shared task","author":"Abdul-Mageed","year":"2024 Aug"},{"key":"ref38","series-title":"Proceedings of the Second Arabic Natural Language Processing Conference","first-page":"758","article-title":"ELYADATA at NADI, 2024 shared task: Arabic dialect identification with similarity-induced mono-to-multi label transformation","author":"Karoui","year":"2024 Aug"},{"key":"ref39","series-title":"Proceedings of the Second Arabic Natural Language Processing Conference","first-page":"153","article-title":"AlclaM: arabic dialect language model","author":"Ahmed","year":"2024 Aug"},{"year":"2024","author":"Alahmari","journal-title":"Intelligent computing","key":"ref40"},{"key":"ref41","doi-asserted-by":"crossref","first-page":"907","DOI":"10.32604\/iasc.2024.055470","article-title":"Arabic dialect identification in social media: a comparative study of deep learning and transformer approaches","volume":"39","author":"Alqulaity","year":"2024","journal-title":"Intell Autom Soft Comput"},{"key":"ref42","series-title":"Proceedings of the EACL 2009 Workshop on Computational Approaches to Semitic Languages","first-page":"53","article-title":"Spoken Arabic dialect identification using phonotactic modeling","author":"Biadsy","year":"2009 Mar"},{"key":"ref43","series-title":"18th International Conference on Computational Linguistics and Intelligent Text Processing (CICLING)","article-title":"Creating parallel Arabic dialect corpus: pitfalls to avoid","author":"Harrat","year":"2017"},{"unstructured":"Habash N, Eryani F, Khalifa S, Rambow O, Abdulrahim D, Erdmann  A, et al. Unified guidelines and resources for Arabic dialect orthography. Miyazaki, Japan: European Language Resources Association (ELRA); 2018.","key":"ref44"},{"key":"ref45","series-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)","first-page":"1173","article-title":"Commonsense knowledge mining from pretrained models","author":"Davison","year":"2019 Nov"},{"key":"ref46","series-title":"Proceedings of the 4th Workshop on Representation Learning for NLP (RepL4NLP-2019)","first-page":"7","article-title":"To tune or not to tune? Adapting pretrained representations to diverse tasks","author":"Peters","year":"2019"},{"key":"ref47","series-title":"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics","first-page":"4019","article-title":"Adversarial and domain-aware BERT for cross-domain sentiment analysis","author":"Du","year":"2020 Jul"},{"key":"ref48","doi-asserted-by":"crossref","first-page":"151","DOI":"10.1007\/s10994-009-5152-4","article-title":"A theory of learning from different domains","volume":"79","author":"Ben-David","year":"2010","journal-title":"Mach Learn"},{"key":"ref49","series-title":"Proceedings of the 32nd International Conference on Machine Learning","first-page":"1180","article-title":"Unsupervised domain adaptation by backpropagation","volume":"37","author":"Ganin","year":"2015"},{"key":"ref50","series-title":"Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)","article-title":"The MADAR Arabic dialect corpus and lexicon","author":"Bouamor","year":"2018 May"},{"key":"ref51","series-title":"Proceedings of the Fifth Arabic Natural Language Processing Workshop","first-page":"97","article-title":"NADI 2020: the first nuanced arabic dialect identification shared task","author":"Abdul-Mageed","year":"2020 Dec"},{"key":"ref52","series-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems","first-page":"6000","article-title":"Attention is all you need","author":"Vaswani","year":"2017"}],"container-title":["Computers, Materials &amp; Continua"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/cdn.techscience.cn\/files\/cmc\/2025\/TSP_CMC-83-2\/TSP_CMC_59870\/TSP_CMC_59870.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T01:07:19Z","timestamp":1763341639000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.techscience.com\/cmc\/v83n2\/60521"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":52,"journal-issue":{"issue":"2","published-online":{"date-parts":[[2025]]},"published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.32604\/cmc.2025.059870","relation":{},"ISSN":["1546-2226"],"issn-type":[{"type":"electronic","value":"1546-2226"}],"subject":[],"published":{"date-parts":[[2025]]}}}