{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T08:19:52Z","timestamp":1774945192205,"version":"3.50.1"},"reference-count":62,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"Deputyship for Research and Innovation, Ministry of Education, Saudi Arabia","award":["DRI-KSU-1292"],"award-info":[{"award-number":["DRI-KSU-1292"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/access.2021.3089924","type":"journal-article","created":{"date-parts":[[2021,6,17]],"date-time":"2021-06-17T19:41:56Z","timestamp":1623958916000},"page":"88405-88428","source":"Crossref","is-referenced-by-count":11,"title":["An Incremental Approach to Corpus Design and Construction: Application to a Large Contemporary Saudi Corpus"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3764-6169","authenticated-orcid":false,"given":"Hebah","family":"Elgibreen","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7720-0076","authenticated-orcid":false,"given":"Mohammed","family":"Faisal","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2866-184X","authenticated-orcid":false,"given":"Mansour Al","family":"Sulaiman","sequence":"additional","affiliation":[]},{"given":"Sherif","family":"Abdou","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9478-9206","authenticated-orcid":false,"given":"Mohamed Amine","family":"Mekhtiche","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7556-9267","authenticated-orcid":false,"given":"Abdullah M.","family":"Moussa","sequence":"additional","affiliation":[]},{"given":"Yousef A.","family":"Alohali","sequence":"additional","affiliation":[]},{"given":"Wadood","family":"Abdul","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9781-3969","authenticated-orcid":false,"given":"Ghulam","family":"Muhammad","sequence":"additional","affiliation":[]},{"given":"Mohsen","family":"Rashwan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7962-8121","authenticated-orcid":false,"given":"Mohammed","family":"Algabri","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1142\/S0219649220400183"},{"key":"ref38","first-page":"1318","article-title":"Habibi&#x2014;A multi dialect multi national Arabic song lyrics corpus","author":"el-haj","year":"2020","journal-title":"Proc 12th Lang Resour Eval Conf"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.dib.2017.01.011"},{"key":"ref32","article-title":"Twitter4j&#x2014;A java library for the Twitter API","author":"yamamoto","year":"2014"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2017.10.118"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-67056-0_3"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/j.dib.2019.104076"},{"key":"ref36","first-page":"3387","article-title":"The MADAR Arabic dialect corpus and lexicon","author":"bouamor","year":"2019","journal-title":"Proc 11th Int Conf Lang Resour Eval (LREC)"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/PAIS.2018.8598524"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/AICCSA.2017.22"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-19807-7_29"},{"key":"ref62","year":"0","journal-title":"Copyright Law Issued by Royal Decree No M\/41 Dated 2\/7\/1424 AH Amended by the Council of Ministers Resolution No (536) Dated 19\/10\/1439 AH"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3049734"},{"key":"ref28","first-page":"272","article-title":"Saudi Twitter corpus for sentiment analysis","volume":"10","author":"assiri","year":"2016","journal-title":"Int J Comput Inf Eng"},{"key":"ref27","first-page":"3839","article-title":"A morphologically annotated corpus of Emirati Arabic","author":"khalifa","year":"2019","journal-title":"Proc 11th Int Conf Lang Resour Eval (LREC)"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/AICCSA.2016.7945800"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-014-9284-1"},{"key":"ref1","first-page":"1","article-title":"Abu El-Khair corpus: A modern standard Arabic corpus","volume":"2","author":"el-khair","year":"2016","journal-title":"Int J Recent Trends Eng Res"},{"key":"ref20","first-page":"39","article-title":"A supervised POS tagger for written Arabic social networking corpora","volume":"5","author":"al-sabbagh","year":"2012","journal-title":"Proc 11th Conf Natural Lang Process KONVENS Empirical Methods Natural Lang Process"},{"key":"ref22","first-page":"1162","article-title":"SANA: A large scale multi-genre, multi-dialect lexicon for Arabic subjectivity and sentiment analysis","author":"abdul-mageed","year":"2014","journal-title":"Proc 9th Int Conf Lang Resour Eval (LREC)"},{"key":"ref21","first-page":"3907","article-title":"AWATIF: A multi-genre corpus for modern standard Arabic subjectivity and sentiment analysis","author":"abdul-mageed","year":"2012","journal-title":"Proc 8th Int Conf Lang Resour Eval (LREC)"},{"key":"ref24","article-title":"1.5 Billion words Arabic corpus","author":"el-khair","year":"2016","journal-title":"arXiv 1611 04033"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3613"},{"key":"ref26","first-page":"4282","article-title":"A large scale corpus of Gulf Arabic","author":"khalifa","year":"2016","journal-title":"Proc 10th Int Conf Lang Resour Eval (LREC)"},{"key":"ref25","first-page":"5","article-title":"The design and construction of the 50 million words KSUCCA","author":"alrabiah","year":"2013","journal-title":"Proc 2nd Workshop Arabic Corpus Linguistics"},{"key":"ref50","first-page":"4134","article-title":"AraNLP: A java-based library for the processing of Arabic text","author":"althobaiti","year":"2014","journal-title":"Proc 9th Int Conf Lang Resour Eval (LREC)"},{"key":"ref51","first-page":"230","article-title":"A study of text preprocessing tools for Arabic text categorization","author":"said","year":"2009","journal-title":"Proc 2nd Int Conf Arabic Lang Resour Tools"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.5909"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-27355-2_6"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.17694\/bajece.419538"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1002\/cae.22253"},{"key":"ref55","first-page":"101","article-title":"LDA-based topic modelling in text sentiment classification: An empirical analysis","volume":"7","author":"onan","year":"2016","journal-title":"Int J Computat Linguistics Appl"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCISci.2019.8716408"},{"key":"ref53","article-title":"Probabilistic methods for searching OCR-degraded Arabic text","author":"darwish","year":"2004"},{"key":"ref52","article-title":"A large-scale computational processor of the Arabic morphology, and applications","author":"attia","year":"2000"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2018.10.462"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3601"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.3115\/1220175.1220261"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-16469-0_10"},{"key":"ref13","first-page":"102","article-title":"The Penn Arabic treebank: Building a large-scale annotated Arabic corpus","author":"maamouri","year":"2016","journal-title":"Proc NEMLAR Conf Arabic Lang Resour Tools"},{"key":"ref14","first-page":"66","article-title":"COLABA: Arabic dialect annotation and processing","author":"diab","year":"2016","journal-title":"Proc LREC Workshop Semitic Lang Process"},{"key":"ref15","article-title":"OSAC: Open source Arabic corpora","volume":"10","author":"saad","year":"2010"},{"key":"ref16","first-page":"2214","article-title":"Parallel data, tools and interfaces in OPUS","author":"tiedemann","year":"2012"},{"key":"ref17","article-title":"MultiUN: A multilingual corpus from united nation documents","author":"eisele","year":"0"},{"key":"ref18","first-page":"923","article-title":"OpenSubtitles2016: Extracting large parallel corpora from movie and TV subtitles","author":"lison","year":"2016","journal-title":"Proc 10th Int Conf Lang Resour Eval (LREC)"},{"key":"ref19","first-page":"2882","article-title":"YADAC: Yet another dialectal Arabic corpus","author":"al-sabbagh","year":"2012","journal-title":"Proc 8th Int Conf Lang Resour Eval (LREC)"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.34028\/iajit\/17\/3\/10"},{"key":"ref3","year":"0","journal-title":"Official Languages"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2017.10.094"},{"key":"ref5","first-page":"1","article-title":"A morphological analyzer for Egyptian Arabic","author":"habash","year":"2012","journal-title":"Proc 12th Meeting Special Interest Group Comput Morphol Phonol"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/1644879.1644881"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.2200\/S00277ED1V01Y201008HLT010"},{"key":"ref49","first-page":"7022","article-title":"CAMeL tools: An open source Python toolkit for Arabic natural language processing","author":"obeid","year":"2020","journal-title":"Proc 12th Lang Resour Eval Conf"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W19-4616"},{"key":"ref46","first-page":"62","article-title":"MADA+ TOKAN: A toolkit for Arabic tokenization, diacritization, morphological disambiguation, POS tagging, stemming and lemmatization","volume":"41","author":"habash","year":"2009","journal-title":"Proc 2nd Int Conf Arabic Lang Resour Tools (MEDAR)"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.3115\/1219840.1219911"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-3003"},{"key":"ref47","article-title":"Automated methods for processing Arabic text: From tokenization to base phrase chunking","author":"diab","year":"2007","journal-title":"Arabic Computational Morphology Knowledge-based and Empirical Methods"},{"key":"ref42","year":"2020","journal-title":"Graph API - Facebook Developers"},{"key":"ref41","year":"2020","journal-title":"CommentThreads YouTube Data API"},{"key":"ref44","first-page":"1094","article-title":"MADAMIRA: A fast, comprehensive tool for morphological analysis and disambiguation of Arabic","author":"pasha","year":"2014","journal-title":"Proc 9th Int Conf Lang Resour Eval (LREC)"},{"key":"ref43","year":"0","journal-title":"Find Your Facebook ID"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/9312710\/09458313.pdf?arnumber=9458313","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,12,17]],"date-time":"2021-12-17T19:56:37Z","timestamp":1639770997000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9458313\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":62,"URL":"https:\/\/doi.org\/10.1109\/access.2021.3089924","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]}}}