{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T13:17:02Z","timestamp":1780060622870,"version":"3.54.0"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031560682","type":"print"},{"value":"9783031560699","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-56069-9_10","type":"book-chapter","created":{"date-parts":[[2024,3,22]],"date-time":"2024-03-22T08:17:45Z","timestamp":1711095465000},"page":"130-143","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["The Open Web Index"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0945-3148","authenticated-orcid":false,"given":"Gijs","family":"Hendriksen","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1747-5643","authenticated-orcid":false,"given":"Michael","family":"Dinzinger","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4242-2458","authenticated-orcid":false,"given":"Sheikh Mastura","family":"Farzana","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9707-6453","authenticated-orcid":false,"given":"Noor Afshan","family":"Fathima","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1003-981X","authenticated-orcid":false,"given":"Maik","family":"Fr\u00f6be","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7701-3294","authenticated-orcid":false,"given":"Sebastian","family":"Schmidt","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2259-0462","authenticated-orcid":false,"given":"Saber","family":"Zerhoudi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3566-5507","authenticated-orcid":false,"given":"Michael","family":"Granitzer","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9733-2890","authenticated-orcid":false,"given":"Matthias","family":"Hagen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4967-2900","authenticated-orcid":false,"given":"Djoerd","family":"Hiemstra","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2451-0665","authenticated-orcid":false,"given":"Martin","family":"Potthast","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9033-2217","authenticated-orcid":false,"given":"Benno","family":"Stein","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,3,23]]},"reference":[{"key":"10_CR1","doi-asserted-by":"crossref","unstructured":"Formal, T., Piwowarski, B., Clinchant, S.: SPLADE: Sparse Lexical and Expansion Model for First Stage Ranking, SIGIR 2021, pp. 2288-2292. Association for Computing Machinery, New York (2021), ISBN 9781450380379","DOI":"10.1145\/3404835.3463098"},{"key":"10_CR2","doi-asserted-by":"crossref","unstructured":"Fr\u00f6be, M., et al.: The Information Retrieval Experiment Platform. In: Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval (2023)","DOI":"10.1145\/3539618.3591888"},{"key":"10_CR3","doi-asserted-by":"publisher","unstructured":"Fr\u00f6be, M., et al.: Continuous integration for reproducible shared tasks with TIRA.io. In: Advances in Information Retrieval. 45th European Conference on IR Research (ECIR 2023). LNCS. Springer (2023). https:\/\/doi.org\/10.1007\/978-3-031-28241-6_20","DOI":"10.1007\/978-3-031-28241-6_20"},{"key":"10_CR4","unstructured":"Gao, L., et al.: The Pile: An 800GB Dataset of Diverse Text for Language Modeling (Dec 2020)"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Goel, S., Broder, A.Z., Gabrilovich, E., Pang, B.: Anatomy of the long tail: ordinary people with extraordinary tastes. In: Davison, B.D., Suel, T., Craswell, N., Liu, B. (eds.) Proceedings of the Third International Conference on Web Search and Web Data Mining, WSDM 2010, 4-6 February 2010, pp. 201\u2013210. ACM, New York (2010)","DOI":"10.1145\/1718487.1718513"},{"key":"10_CR6","unstructured":"Gollub, T., Potthast, M., Stein, B.: Shaping the Information Nutrition Label. In: Albakour, D., Corney, D., Gonzalo, J., Martinez, M., Poblete, B., Valochas, A. (eds.) 2nd International Workshop on Recent Trends in News Information Retrieval (NewsIR 2018) at ECIR. CEUR Workshop Proceedings, vol. 2079, pp. 9\u201311 (Mar 2018), ISSN 1613-0073"},{"key":"10_CR7","doi-asserted-by":"crossref","unstructured":"Granitzer, M., Voigt, S., et\u00a0al.: Impact and Development of an Open Web Index for Open Web Search. J. Assoc. Inform. Sci. Technol. (2023)","DOI":"10.1002\/asi.24818"},{"key":"10_CR8","doi-asserted-by":"crossref","unstructured":"Guha, R.V., Brickley, D., MacBeth, S.: Schema.org: evolution of structured data on the web: big data makes common schemas even more necessary. Queue 13(9), 10\u201337 (2015), ISSN 1542-7730","DOI":"10.1145\/2857274.2857276"},{"key":"10_CR9","unstructured":"Kamphuis, C., Hasibi, F., Lin, J., de\u00a0Vries, A.P.: REBL: entity linking at scale. In: Alonso, O., Baeza-Yates, R., King, T.H., Silvello, G. (eds.) Proceedings of the Third International Conference on Design of Experimental Search & Information Retrieval Systems, San Jose, CA, USA, 30-31 August 2022. CEUR Workshop Proceedings, vol. 3480, pp. 68\u201375. CEUR-WS.org (2022)"},{"key":"10_CR10","doi-asserted-by":"crossref","unstructured":"Khattab, O., Zaharia, M.: ColBERT: efficient and effective passage search via contextualized late interaction over BERT. In: Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR 2020, pp. 39-48. Association for Computing Machinery, New York (2020), ISBN 9781450380164","DOI":"10.1145\/3397271.3401075"},{"key":"10_CR11","doi-asserted-by":"crossref","unstructured":"Koster, M., Illyes, G., Zeller, H., Sassman, L.: RFC 9309 Robots Exclusion Protocol (2022)","DOI":"10.17487\/RFC9309"},{"key":"10_CR12","unstructured":"Kreutzer, J., et\u00a0al.: Quality at a Glance: An Audit of Web-Crawled Multilingual Datasets (2021)"},{"issue":"4","key":"10_CR13","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1145\/3312479","volume":"62","author":"D Lewandowski","year":"2019","unstructured":"Lewandowski, D.: The web is missing an essential part of infrastructure: an open web index. Commun. ACM 62(4), 24 (2019)","journal-title":"Commun. ACM"},{"key":"10_CR14","unstructured":"Li, H., Su, Y., Cai, D., Wang, Y., Liu, L.: A Survey on Retrieval-Augmented Text Generation. arXiv preprint arXiv:2202.01110 (2022)"},{"key":"10_CR15","doi-asserted-by":"crossref","unstructured":"Lin, J., et al.: Supporting interoperability between open-source search engines with the common index file format. In: Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2149\u20132152 (2020)","DOI":"10.1145\/3397271.3401404"},{"key":"10_CR16","doi-asserted-by":"publisher","unstructured":"Lugeon, S., Piccardi, T.: Curlie Dataset - Language-agnostic Website Embedding and Classification (Jan 2023). https:\/\/doi.org\/10.6084\/m9.figshare.19406693.v5, https:\/\/figshare.com\/articles\/dataset\/Curlie_Dataset_-_Language-agnostic_Website_Embedding_and_Classification\/19406693","DOI":"10.6084\/m9.figshare.19406693.v5"},{"issue":"4","key":"10_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3202662","volume":"36","author":"SE Middleton","year":"2018","unstructured":"Middleton, S.E., Kordopatis-Zilos, G., Papadopoulos, S., Kompatsiaris, Y.: Location extraction from social media: geoparsing, location disambiguation, and geotagging. ACM Trans. Inform. Syst. (TOIS) 36(4), 1\u201327 (2018)","journal-title":"ACM Trans. Inform. Syst. (TOIS)"},{"key":"10_CR18","unstructured":"M\u00fchleisen, H., Bizer, C.: Web data commons - extracting structured data from two large web corpora. In: Bizer, C., Heath, T., Berners-Lee, T., Hausenblas, M. (eds.) WWW 2012 Workshop on Linked Data on the Web, Lyon, France, 16 April 2012. CEUR Workshop Proceedings, vol. 937. CEUR-WS.org (2012)"},{"key":"10_CR19","doi-asserted-by":"crossref","unstructured":"Overwijk, A., Xiong, C., Liu, X., VandenBerg, C., Callan, J.: ClueWeb22: 10 Billion Web Documents with Visual and Semantic Information (Dec 2022)","DOI":"10.1145\/3477495.3536321"},{"key":"10_CR20","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21, 140:1\u2013140:67 (2020)"},{"key":"10_CR21","unstructured":"Scao, T.L., et\u00a0al.: BLOOM: A 176B-Parameter Open-Access Multilingual Language Model. CoRR arXiv: 2211.05100 (2022)"},{"key":"10_CR22","doi-asserted-by":"crossref","unstructured":"Scells, H., Zhuang, S., Zuccon, G.: Reduce, reuse, recycle: green information retrieval research. In: Amig\u00f3, E., Castells, P., Gonzalo, J., Carterette, B., Culpepper, J.S., Kazai, G. (eds.) SIGIR 2022: The 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, Madrid, Spain, 11 - 15 July 2022, pp. 2825\u20132837. ACM (2022)","DOI":"10.1145\/3477495.3531766"},{"key":"10_CR23","unstructured":"Touvron, H., et al.: LLaMA: Open and Efficient Foundation Language Models. CoRR arXiv: 2302.13971 (2023)"},{"key":"10_CR24","doi-asserted-by":"crossref","unstructured":"van Hulst, J.M., Hasibi, F., Dercksen, K., Balog, K., de Vries, A.P.: REL: an entity linker standing on the shoulders of giants. In: Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2197\u20132200. ACM, Virtual Event China (Jul 2020), ISBN 978-1-4503-8016-4","DOI":"10.1145\/3397271.3401416"},{"key":"10_CR25","doi-asserted-by":"crossref","unstructured":"Wiegmann, M., Wolska, M., Schr\u00f6der, C., Borchardt, O., Stein, B., Potthast, M.: Trigger warning assignment as a multi-label document classification problem. In: Rogers, A., Boyd-Graber, J., Okazaki, N. (eds.) Proceedings of the 61th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 12113\u201312134. Association for Computational Linguistics, Toronto, Canada (Jul 2023)","DOI":"10.18653\/v1\/2023.acl-long.676"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-56069-9_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,22]],"date-time":"2024-03-22T08:21:55Z","timestamp":1711095715000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-56069-9_10"}},"subtitle":["Crawling and Indexing the Web for Public Use"],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031560682","9783031560699"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-56069-9_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"23 March 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECIR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Information Retrieval","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 March 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 March 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecir2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.ecir2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"578","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"110","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"69","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"19% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31 (Tracks: Workshop, Tutorial, Industry, Doctoral Consortium)","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}