{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T13:09:25Z","timestamp":1771679365452,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":14,"publisher":"ACM","license":[{"start":{"date-parts":[[2016,7,7]],"date-time":"2016-07-07T00:00:00Z","timestamp":1467849600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100008982","name":"Qatar National Research Fund","doi-asserted-by":"publisher","award":["NPRP 7-1313-1-245"],"award-info":[{"award-number":["NPRP 7-1313-1-245"]}],"id":[{"id":"10.13039\/100008982","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,7,7]]},"DOI":"10.1145\/2911451.2914677","type":"proceedings-article","created":{"date-parts":[[2016,7,8]],"date-time":"2016-07-08T15:03:00Z","timestamp":1467990180000},"page":"673-676","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["ArabicWeb16"],"prefix":"10.1145","author":[{"given":"Reem","family":"Suwaileh","sequence":"first","affiliation":[{"name":"Qatar University, Doha, Qatar"}]},{"given":"Mucahid","family":"Kutlu","sequence":"additional","affiliation":[{"name":"Qatar University, Doha, Qatar"}]},{"given":"Nihal","family":"Fathima","sequence":"additional","affiliation":[{"name":"Qatar University, Doha, Qatar"}]},{"given":"Tamer","family":"Elsayed","sequence":"additional","affiliation":[{"name":"Qatar University, Doha, Qatar"}]},{"given":"Matthew","family":"Lease","sequence":"additional","affiliation":[{"name":"University of Texas at Austin, Austin, USA"}]}],"member":"320","published-online":{"date-parts":[[2016,7,7]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0306-4573(02)00084-5"},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the Language Resources and Evaluation Conf. (LREC)","author":"Baroni M.","year":"2004","unstructured":"M. Baroni and S. Bernardini. BootCaT: Bootstrapping Corpora and Terms from the Web. In Proceedings of the Language Resources and Evaluation Conf. (LREC), 2004."},{"key":"e_1_3_2_1_3_1","first-page":"1240","volume-title":"Proceedings of the Language Resources and Evaluation Conference (LREC)","author":"Bouamor H.","year":"2014","unstructured":"H. Bouamor, N. Habash, and K. Oflazer. A Multidialectal Parallel Corpus of Arabic. In Proceedings of the Language Resources and Evaluation Conference (LREC), pages 1240--1245, 2014."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10791-007-9032-x"},{"key":"e_1_3_2_1_5_1","volume-title":"Presentation","author":"Callan J.","year":"2009","unstructured":"J. Callan, M. Hoy, C. Yoo, and L. Zhao. The ClueWeb09 Dataset, 2009. Presentation Nov. 19, 2009 at NIST TREC. Slides online at boston.lti.cs.cmu.edu\/classes\/11-742\/S10-TREC\/TREC-Nov19-09.pdf."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/S1389-1286(99)00052-3"},{"key":"e_1_3_2_1_7_1","first-page":"687","volume-title":"Proceedings of the international conference on scientific information","volume":"1","author":"Cleverdon C. W.","year":"1959","unstructured":"C. W. Cleverdon. The evaluation of systems used in information retrieval. In Proceedings of the international conference on scientific information, volume 1, pages 687--698. National Academy of Sciences, 1959."},{"key":"e_1_3_2_1_8_1","first-page":"241","volume-title":"Multi-Genre Corpus of Informal Written Arabic. In Proceedings of the Language Resources and Evaluation Conference (LREC)","author":"Cotterell R.","year":"2014","unstructured":"R. Cotterell and C. Callison-Burch. A Multi-Dialect, Multi-Genre Corpus of Informal Written Arabic. In Proceedings of the Language Resources and Evaluation Conference (LREC), pages 241--245, 2014."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000031"},{"key":"e_1_3_2_1_10_1","volume-title":"Proc. of the Tenth Text REtrieval Conference (TREC 10)","author":"Gey F. C.","year":"2001","unstructured":"F. C. Gey and D. W. Oard. The TREC-2001 Cross-Language Information Retrieval Track: Searching Arabic Using English, French or Arabic Queries. In Proc. of the Tenth Text REtrieval Conference (TREC 10), 2001."},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the Eighth Text REtrieval Conference (TREC 8)","author":"Hawking D.","year":"1999","unstructured":"D. Hawking, E. Voorhees, N. Craswell, and P. Bailey. Overview of the TREC-8 Web Track. In Proceedings of the Eighth Text REtrieval Conference (TREC 8), 1999."},{"key":"e_1_3_2_1_12_1","volume-title":"Exploratory analysis of a terabyte scale web corpus. arXiv preprint arXiv:1409.5443","author":"Kolias V.","year":"2014","unstructured":"V. Kolias, I. Anagnostopoulos, and E. Kayafas. Exploratory analysis of a terabyte scale web corpus. arXiv preprint arXiv:1409.5443, 2014."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.5555\/1953048.2078195"},{"key":"e_1_3_2_1_14_1","volume-title":"Language detection library for java","author":"Shuyo N.","year":"2010","unstructured":"N. Shuyo. Language detection library for java, 2010. http:\/\/code.google.com\/p\/language-detection\/."}],"event":{"name":"SIGIR '16: The 39th International ACM SIGIR conference on research and development in Information Retrieval","location":"Pisa Italy","acronym":"SIGIR '16","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 39th International ACM SIGIR conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2911451.2914677","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2911451.2914677","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:55:03Z","timestamp":1750222503000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2911451.2914677"}},"subtitle":["A New Crawl for Today's Arabic Web"],"short-title":[],"issued":{"date-parts":[[2016,7,7]]},"references-count":14,"alternative-id":["10.1145\/2911451.2914677","10.1145\/2911451"],"URL":"https:\/\/doi.org\/10.1145\/2911451.2914677","relation":{},"subject":[],"published":{"date-parts":[[2016,7,7]]},"assertion":[{"value":"2016-07-07","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}