{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T13:29:29Z","timestamp":1760707769423,"version":"3.41.0"},"reference-count":5,"publisher":"Association for Computing Machinery (ACM)","issue":"2","license":[{"start":{"date-parts":[[2004,12,1]],"date-time":"2004-12-01T00:00:00Z","timestamp":1101859200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":["SIGIR Forum"],"published-print":{"date-parts":[[2004,12]]},"abstract":"<jats:p>A large scale collection of web pages has been essential for research in information retrieval and related areas. This paper provides an overview of a large web collection used in the SPIRIT project for the design and testing of spatially-aware retrieval systems. Several statistics are derived and presented to show the characteristics of the collection.<\/jats:p>","DOI":"10.1145\/1041394.1041395","type":"journal-article","created":{"date-parts":[[2007,1,17]],"date-time":"2007-01-17T18:32:02Z","timestamp":1169058722000},"page":"57-61","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":15,"title":["The SPIRIT collection"],"prefix":"10.1145","volume":"38","author":[{"given":"Hideo","family":"Joho","sequence":"first","affiliation":[{"name":"University of Sheffield"}]},{"given":"Mark","family":"Sanderson","sequence":"additional","affiliation":[{"name":"University of Sheffield"}]}],"member":"320","published-online":{"date-parts":[[2004,12]]},"reference":[{"key":"e_1_2_1_1_1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"394","DOI":"10.1007\/978-3-540-24752-4_29","volume-title":"McDonald, S. & Tait, J. (eds.), Advances in Information Retrieval, Proceedings of the 26th European Conference on IR Research","author":"Cacheda F.","year":"2004"},{"key":"e_1_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/564376.564448"},{"key":"e_1_2_1_3_1","first-page":"78","volume-title":"Overview of the TREC 2003 Web Track","author":"Craswell N.","year":"2003"},{"key":"e_1_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:INRT.0000011206.23588.ab"},{"key":"e_1_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/564376.564457"}],"container-title":["ACM SIGIR Forum"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1041394.1041395","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1041394.1041395","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T17:24:01Z","timestamp":1750267441000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1041394.1041395"}},"subtitle":["an overview of a large web collection"],"short-title":[],"issued":{"date-parts":[[2004,12]]},"references-count":5,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2004,12]]}},"alternative-id":["10.1145\/1041394.1041395"],"URL":"https:\/\/doi.org\/10.1145\/1041394.1041395","relation":{},"ISSN":["0163-5840"],"issn-type":[{"type":"print","value":"0163-5840"}],"subject":[],"published":{"date-parts":[[2004,12]]},"assertion":[{"value":"2004-12-01","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}