{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,22]],"date-time":"2025-12-22T22:11:45Z","timestamp":1766441505762,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"UKRI EPSRC","award":["EP\/Y528651\/1"],"award-info":[{"award-number":["EP\/Y528651\/1"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,29]]},"DOI":"10.1145\/3678717.3691286","type":"proceedings-article","created":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T06:29:21Z","timestamp":1732256961000},"page":"585-588","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Quantifying Geospatial in the Common Crawl Corpus"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-7082-7122","authenticated-orcid":false,"given":"Ilya","family":"Ilyankou","sequence":"first","affiliation":[{"name":"UCL SpaceTimeLab, London, UK"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8420-2141","authenticated-orcid":false,"given":"Meihui","family":"Wang","sequence":"additional","affiliation":[{"name":"UCL SpaceTimeLab, London, UK"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3575-0365","authenticated-orcid":false,"given":"Stefano","family":"Cavazzi","sequence":"additional","affiliation":[{"name":"Ordnance Survey, Southampton, UK"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9506-4266","authenticated-orcid":false,"given":"James","family":"Haworth","sequence":"additional","affiliation":[{"name":"UCL SpaceTimeLab, London, UK"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,11,22]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Organizational Research: Determining Appropriate Sample Size in Survey Research.","author":"Bartlett James E","year":"2001","unstructured":"James E Bartlett, Joe W Kotrlik, and Chadwick C Higgins. 2001. Organizational Research: Determining Appropriate Sample Size in Survey Research. (2001)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589132.3625625"},{"key":"e_1_3_2_1_3_1","volume-title":"Vinay Uday Prabhu, and Emmanuel Kahembwe","author":"Birhane Abeba","year":"2021","unstructured":"Abeba Birhane, Vinay Uday Prabhu, and Emmanuel Kahembwe. 2021. Multimodal datasets: misogyny, pornography, and malignant stereotypes. http:\/\/arxiv.org\/abs\/2110.01963 arXiv:2110.01963 [cs]."},{"volume-title":"Manual of Geospatial Science and Technology","author":"Bossler John","key":"e_1_3_2_1_4_1","unstructured":"John Bossler. 2010. Manual of Geospatial Science and Technology. CRC Press. Google-Books-ID: UdZ3uDekqwwC."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/WI-IAT.2010.64"},{"key":"e_1_3_2_1_6_1","unstructured":"Common Crawl. 2024. Statistics of Common Crawl Monthly Archives by commoncrawl. https:\/\/commoncrawl.github.io\/cc-crawl-statistics\/plots\/languages"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Jesse Dodge Maarten Sap Ana Marasovi\u0107 William Agnew Gabriel Ilharco Dirk Groeneveld Margaret Mitchell and Matt Gardner. 2021. Documenting Large Webtext Corpora: A Case Study on the Colossal Clean Crawled Corpus. http:\/\/arxiv.org\/abs\/2104.08758 arXiv:2104.08758 [cs].","DOI":"10.18653\/v1\/2021.emnlp-main.98"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-95786-9_22"},{"key":"e_1_3_2_1_9_1","unstructured":"Nir Fulman Abdulkadir Memduho\u011flu and Alexander Zipf. 2024. Evidence for Systematic Bias in the Spatial Memory of Large Language Models. (2024)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","unstructured":"Wes Gurnee and Max Tegmark. 2024. Language Models Represent Space and Time. https:\/\/doi.org\/10.48550\/arXiv.2310.02207 arXiv:2310.02207 [cs].","DOI":"10.48550\/arXiv.2310.02207"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00447"},{"key":"e_1_3_2_1_12_1","volume-title":"Viviano","author":"Luccioni Alexandra Sasha","year":"2021","unstructured":"Alexandra Sasha Luccioni and Joseph D. Viviano. 2021. What's in the Box? A Preliminary Analysis of Undesirable Content in the Common Crawl Corpus. http:\/\/arxiv.org\/abs\/2105.02732 arXiv:2105.02732 [cs]."},{"key":"e_1_3_2_1_13_1","unstructured":"Gengchen Mai Weiming Huang Jin Sun Suhang Song Deepak Mishra Ninghao Liu Song Gao Tianming Liu Gao Cong Yingjie Hu Chris Cundy Ziyuan Li Rui Zhu and Ni Lao. 2023. On the Opportunities and Challenges of Foundation Models for Geospatial Artificial Intelligence. http:\/\/arxiv.org\/abs\/2304.06798 arXiv:2304.06798 [cs]."},{"key":"e_1_3_2_1_14_1","unstructured":"Rohin Manvi Samar Khanna Marshall Burke David Lobell and Stefano Ermon. 2024. Large Language Models are Geographically Biased. http:\/\/arxiv.org\/abs\/2402.02680 arXiv:2402.02680 [cs]."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Mazda Moayeri and Soheil Feizi. 2024. WorldBench: Quantifying Geographic Disparities in LLM Factual Recall. https:\/\/openreview.net\/forum?id=fubvUIBggI","DOI":"10.1145\/3630106.3658967"},{"key":"e_1_3_2_1_16_1","unstructured":"Hannes M\u00fchleisen and Christian Bizer. 2012. Web Data Commons - Extracting Structured Data from Two Large Web Corpora. (2012)."},{"key":"e_1_3_2_1_17_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans and Ilya Sutskever. 2018. Improving Language Understanding by Generative Pre-Training. (2018)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2494188.2494193"},{"key":"e_1_3_2_1_19_1","unstructured":"Gemini Team. 2024. Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context. arXiv:2403.05530 [cs.CL]"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2567948.2576962"},{"key":"e_1_3_2_1_21_1","volume-title":"Brian Lester, Nan Du, Andrew M. Dai, and Quoc V. Le.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Maarten Bosma, Vincent Y. Zhao, Kelvin Guu, Adams Wei Yu, Brian Lester, Nan Du, Andrew M. Dai, and Quoc V. Le. 2022. Finetuned Language Models Are Zero-Shot Learners. http:\/\/arxiv.org\/abs\/2109.01652 arXiv:2109.01652 [cs]."},{"key":"e_1_3_2_1_22_1","unstructured":"Tingyu Xie Qi Li Jian Zhang Yan Zhang Zuozhu Liu and Hongwei Wang. 2023. Empirical Study of Zero-Shot NER with ChatGPT. (2023)."},{"key":"e_1_3_2_1_23_1","unstructured":"Wenxuan Zhou Sheng Zhang Yu Gu Muhao Chen and Hoifung Poon. 2024. UniversalNER: Targeted Distillation from Large Language Models for Open Named Entity Recognition. http:\/\/arxiv.org\/abs\/2308.03279"}],"event":{"name":"SIGSPATIAL '24: The 32nd ACM International Conference on Advances in Geographic Information Systems","sponsor":["SIGSPATIAL ACM Special Interest Group on Spatial Information"],"location":"Atlanta GA USA","acronym":"SIGSPATIAL '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Advances in Geographic Information Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3678717.3691286","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3678717.3691286","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T10:40:21Z","timestamp":1755859221000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3678717.3691286"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,29]]},"references-count":23,"alternative-id":["10.1145\/3678717.3691286","10.1145\/3678717"],"URL":"https:\/\/doi.org\/10.1145\/3678717.3691286","relation":{},"subject":[],"published":{"date-parts":[[2024,10,29]]},"assertion":[{"value":"2024-11-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}