{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T18:41:45Z","timestamp":1768588905539,"version":"3.49.0"},"publisher-location":"Berlin, Heidelberg","reference-count":167,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642209451","type":"print"},{"value":"9783642209468","type":"electronic"}],"license":[{"start":{"date-parts":[[2011,1,1]],"date-time":"2011-01-01T00:00:00Z","timestamp":1293840000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2011,1,1]],"date-time":"2011-01-01T00:00:00Z","timestamp":1293840000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011]]},"DOI":"10.1007\/978-3-642-20946-8_2","type":"book-chapter","created":{"date-parts":[[2011,6,9]],"date-time":"2011-06-09T13:49:09Z","timestamp":1307627349000},"page":"27-50","source":"Crossref","is-referenced-by-count":24,"title":["Scalability Challenges in Web Search Engines"],"prefix":"10.1007","author":[{"given":"Berkant Barla","family":"Cambazoglu","sequence":"first","affiliation":[]},{"given":"Ricardo","family":"Baeza-Yates","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"2_CR28","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1145\/1498759.1498766","volume-title":"Proceedings of the ACM Conference on Web Search and Data Mining","author":"R Agrawal","year":"2009","unstructured":"Agrawal R, Gollapudi S, Halverson A, Ieong S (2009) Diversifying search results. In: Proceedings of the ACM Conference on Web Search and Data Mining. ACM Press, New York, NY, pp 5\u201314"},{"key":"2_CR31","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"628","DOI":"10.1007\/978-3-642-00958-7_59","volume-title":"Proceedings of the European Conference on Information Retrieval","author":"I Altingovde","year":"2009","unstructured":"Altingovde I, Ozcan R, Ulusoy O (2009) A cost-aware strategy for query result caching in web search engines. In: Boughanem M, Berrut C, Mothe J, Soule-Dupuy C (eds) Proceedings of the European Conference on Information Retrieval. Lecture Notes in Computer Science, vol 5478. Springer, Berlin\/Heidelberg, pp 628\u2013636"},{"key":"2_CR34","first-page":"61","volume-title":"Proceedings of the Australasian Database Conference","author":"V Anh","year":"2004","unstructured":"Anh V, Moffat A (2004) Index compression using fixed binary codewords. In: Proceedings of the Australasian Database Conference. Australian Computer Society and Inc, Darlinghurst, pp 61\u201367"},{"issue":"6","key":"2_CR35","doi-asserted-by":"publisher","first-page":"857","DOI":"10.1109\/TKDE.2006.99","volume":"18","author":"V Anh","year":"2006","unstructured":"Anh V, Moffat A (2006a) Improved word-aligned binary compression for text indexing. IEEE Transactions on Knowledge and Data Engineering 18(6):857\u2013861","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"2_CR36","first-page":"372","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"V Anh","year":"2006","unstructured":"Anh V, Moffat A (2006b) Pruned query evaluation using pre-computed impacts. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 372\u2013379"},{"key":"2_CR37","first-page":"35","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"V Anh","year":"2001","unstructured":"Anh V, de Kretser O, Moffat A (2001) Vector-space ranking with effective early termination. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 35\u201342"},{"issue":"1","key":"2_CR40","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1145\/383034.383035","volume":"1","author":"A Arasu","year":"2001","unstructured":"Arasu A, Cho J, Garcia-Molina H, Paepcke A, Raghavan S (2001) Searching the Web. ACM Transactions on Internet Technology 1(1):2\u201343","journal-title":"ACM Transactions on Internet Technology"},{"key":"2_CR49","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1109\/SPIRE.2001.989733","volume-title":"Proceedings of the International Symposium on String Processing and Information Retrieval","author":"C Badue","year":"2001","unstructured":"Badue C, Baeza-Yates R, Ribeiro-Neto BA, Ziviani N (2001) Distributed query processing using partitioned inverted files. In: Proceedings of the International Symposium on String Processing and Information Retrieval, pp 10\u201320"},{"issue":"3","key":"2_CR50","doi-asserted-by":"publisher","first-page":"592","DOI":"10.1016\/j.ipm.2006.09.002","volume":"43","author":"C Badue","year":"2007","unstructured":"Badue C, Baeza-Yates R, Ribeiro-Neto BA, Ziviani A, Ziviani N (2007) Analyzing imbalance among homogeneous index servers in a web search system. Information Processing and Management 43(3):592\u2013608","journal-title":"Information Processing and Management"},{"key":"2_CR52","volume-title":"Modern Information Retrieval","author":"R Baeza-Yates","year":"2010","unstructured":"Baeza-Yates R, Ribeiro-Neto B (2010) Modern Information Retrieval, 2nd edn. Addison-Wesley, Reading, MA","edition":"2"},{"key":"2_CR53","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1007\/978-3-540-39984-1_5","volume-title":"Proceedings of the International Symposium on String Processing Information Retrieval","author":"R Baeza-Yates","year":"2003","unstructured":"Baeza-Yates R, Saint-Jean F (2003) A three level search engine index based in query log distribution. In: Nascimento M, de Moura E, Oliveira A (eds) Proceedings of the International Symposium on String Processing Information Retrieval. Lecture Notes in Computer Science, vol 2857. Springer, Berlin\/Heidelberg, pp 56\u201365"},{"key":"2_CR54","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1007\/978-3-540-75530-2_7","volume-title":"Proceedings of the International Symposium on String Processing Information Retrieval","author":"R Baeza-Yates","year":"2007","unstructured":"Baeza-Yates R, Junqueira F, Plachouras V, Witschel H (2007a) Admission policies for caches of search engine results. In: Ziviani N, Baeza-Yates R (eds) Proceedings of the International Symposium on String Processing Information Retrieval. Lecture Notes in Computer Science, vol 4726. Springer, Berlin\/Heidelberg, pp 74\u201385"},{"key":"2_CR55","first-page":"183","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"R Baeza-Yates","year":"2007","unstructured":"Baeza-Yates R, Gionis A, Junqueira F, Murdock V, Plachouras V, Silvestri F (2007b) The impact of caching on search engines. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 183\u2013190"},{"key":"2_CR56","first-page":"6","volume-title":"Proceedings of the International Conference on Data Engineering","author":"R Baeza-Yates","year":"2007","unstructured":"Baeza-Yates R, Castillo C, Junqueira F, Plachouras V, Silvestri F (2007c) Challenges in distributed information retrieval. In: Proceedings of the International Conference on Data Engineering. IEEE CS, New York, NY, pp 6\u201320"},{"key":"2_CR57","doi-asserted-by":"publisher","first-page":"425","DOI":"10.1145\/1645953.1646009","volume-title":"Proceedings of the ACM Conference on Information and Knowledge Management","author":"R Baeza-Yates","year":"2009","unstructured":"Baeza-Yates R, Gionis A, Junqueira F, Plachouras V, Telloli L (2009a) On the feasibility of multi-site web search engines. In: Proceedings of the ACM Conference on Information and Knowledge Management. ACM Press, New York, NY, pp 425\u2013434"},{"key":"2_CR58","first-page":"163","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"R Baeza-Yates","year":"2009","unstructured":"Baeza-Yates R, Murdock V, Hauff C (2009b) Efficiency trade-offs in two-tier web search systems. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 163\u2013170"},{"key":"2_CR65","doi-asserted-by":"crossref","unstructured":"Barroso L, H\u00f6lzle U (2009) The Datacenter as a Computer. Synthesis Lectures on Computer Architecture. Morgan & Claypool","DOI":"10.1007\/978-3-031-01722-3"},{"issue":"2","key":"2_CR66","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1109\/MM.2003.1196112","volume":"23","author":"L Barroso","year":"2003","unstructured":"Barroso L, Dean J, H\u00f6lzle U (2003) Web search for a planet: The Google cluster architecture. IEEE Micro 23(2):22\u201328","journal-title":"IEEE Micro"},{"key":"2_CR95","first-page":"1579","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"K Bharat","year":"1999","unstructured":"Bharat K, Broder AZ (1999) Mirror, mirror on the Web: A study of host pairs with replicated content. In: Proceedings of the International Conference on the World Wide Web. Elsevier\/North-Holland, New York, NY, pp 1579\u20131590"},{"issue":"12","key":"2_CR96","doi-asserted-by":"publisher","first-page":"1114","DOI":"10.1002\/1097-4571(2000)9999:9999<::AID-ASI1025>3.0.CO;2-0","volume":"51","author":"K Bharat","year":"2000","unstructured":"Bharat K, Broder A, Dean J, Henzinger M (2000) A comparison of techniques to find mirrored hosts on the WWW. Journal of the American Society for Information Science 51(12):1114\u20131122","journal-title":"Journal of the American Society for Information Science"},{"issue":"4","key":"2_CR104","doi-asserted-by":"publisher","first-page":"499","DOI":"10.1007\/s10791-006-6614-y","volume":"9","author":"R Blanco","year":"2006","unstructured":"Blanco R, Barreiro A (2006) TSP and cluster-based solutions to the reassignment of document identifiers. Journal of Information Retrieval 9(4):499\u2013517","journal-title":"Journal of Information Retrieval"},{"key":"2_CR105","first-page":"82","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"R Blanco","year":"2010","unstructured":"Blanco R, Bortnikov E, Junqueira F, Lempel R, Telloli L, Zaragoza H (2010) Caching search engine results over incremental indices. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 82\u201389"},{"key":"2_CR106","first-page":"342","volume-title":"Proceedings of the Data Compression Conference","author":"D Blandford","year":"2002","unstructured":"Blandford D, Blelloch G (2002) Index compression through document reordering. In: Proceedings of the Data Compression Conference. IEEE Computer Society, Washington, DC, pp 342\u2013351"},{"issue":"8","key":"2_CR109","doi-asserted-by":"publisher","first-page":"711","DOI":"10.1002\/spe.587","volume":"34","author":"P Boldi","year":"2004","unstructured":"Boldi P, Codenotti B, Santini M, Vigna S (2004) UbiCrawler: a scalable fully distributed web crawler. Software: Practice and Experience 34(8):711\u2013726","journal-title":"Software: Practice and Experience"},{"key":"2_CR110","doi-asserted-by":"crossref","first-page":"609","DOI":"10.1145\/1458082.1458163","volume-title":"Proceedings of the ACM Conference on Information and Knowledge Management","author":"P Boldi","year":"2008","unstructured":"Boldi P, Bonchi F, Castillo C, Donato D, Gionis A, Vigna S (2008) The query-flow graph: Model and applications. In: Proceedings of the ACM Conference on Information and Knowledge Management. ACM Press, New York, NY, pp 609\u2013618"},{"issue":"1\u20137","key":"2_CR117","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1016\/S0169-7552(98)00110-X","volume":"30","author":"S Brin","year":"1998","unstructured":"Brin S, Page L (1998) The anatomy of a large-scale hypertextual web search engine. Computer Networks and ISDN Systems 30(1\u20137):107\u2013117","journal-title":"Computer Networks and ISDN Systems"},{"key":"2_CR118","doi-asserted-by":"publisher","first-page":"1157","DOI":"10.1016\/S0169-7552(97)00031-7","volume":"29","author":"A Broder","year":"1997","unstructured":"Broder A, Glassman S, Manasse M, Zweig G (1997) Syntactic clustering of the Web. Computer Networks and ISDN Systems 29:1157\u20131166","journal-title":"Computer Networks and ISDN Systems"},{"key":"2_CR119","first-page":"426","volume-title":"Proceedings of the ACM Conference on Information and Knowledge Management","author":"A Broder","year":"2003","unstructured":"Broder A, Carmel D, Herscovici M, Soffer A, Zien J (2003a) Efficient query evaluation using a two-level retrieval process. In: Proceedings of the ACM Conference on Information and Knowledge Management. ACM Press, New York, NY, pp 426\u2013434"},{"key":"2_CR120","doi-asserted-by":"crossref","first-page":"679","DOI":"10.1145\/775152.775247","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"A Broder","year":"2003","unstructured":"Broder A, Najork M, Wiener J (2003b) Efficient URL caching for World Wide Web crawling. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 679\u2013689"},{"key":"2_CR125","first-page":"30","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"E Brown","year":"1995","unstructured":"Brown E (1995) Fast evaluation of structured queries for information retrieval. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 30\u201338"},{"key":"2_CR131","first-page":"97","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"C Buckley","year":"1985","unstructured":"Buckley C, Lewit A (1985) Optimization of inverted vector searches. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 97\u2013110"},{"key":"2_CR135","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1145\/1099554.1099645","volume-title":"Proceedings of the ACM Conference on Information and Knowledge Management","author":"S B\u00fcttcher","year":"2005","unstructured":"B\u00fcttcher S, Clarke C (2005) Indexing time vs query time: trade-offs in dynamic information retrieval systems. In: Proceedings of the ACM Conference on Information and Knowledge Management. ACM Press, New York, NY, pp 317\u2013318"},{"key":"2_CR136","first-page":"356","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"S B\u00fcttcher","year":"2006","unstructured":"B\u00fcttcher S, Clarke C, Lushman B (2006a) Hybrid index maintenance for growing text collections. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 356\u2013363"},{"key":"2_CR137","first-page":"621","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"S B\u00fcttcher","year":"2006","unstructured":"B\u00fcttcher S, Clarke C, Lushman B (2006b) Term proximity scoring for ad-hoc retrieval on very large text collections. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 621\u2013622"},{"issue":"1","key":"2_CR139","doi-asserted-by":"publisher","first-page":"204","DOI":"10.1016\/j.ipm.2006.06.002","volume":"43","author":"F Cacheda","year":"2007","unstructured":"Cacheda F, Carneiro V, Plachouras V, Ounis I (2007) Performance analysis of distributed information retrieval architectures using an improved network simulation model. Information Processing and Management 43(1):204\u2013224","journal-title":"Information Processing and Management"},{"issue":"1","key":"2_CR141","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/333135.333136","volume":"18","author":"B Cahoon","year":"2000","unstructured":"Cahoon B, McKinley K, Lu Z (2000) Evaluating the performance of distributed architectures for information retrieval using a variety of workloads. ACM Transactions on Information Systems 18(1):1\u201343","journal-title":"ACM Transactions on Information Systems"},{"key":"2_CR146","first-page":"21","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"J Callan","year":"1995","unstructured":"Callan J, Lu Z, Croft W (1995b) Searching distributed collections with inference networks. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 21\u201328"},{"issue":"4","key":"2_CR147","doi-asserted-by":"publisher","first-page":"875","DOI":"10.1016\/j.ipm.2005.06.004","volume":"42","author":"B Cambazoglu","year":"2006","unstructured":"Cambazoglu B, Aykanat C (2006) Performance of query processing implementations in ranking-based text retrieval systems using inverted indices. Information Processing and Management 42(4):875\u2013898","journal-title":"Information Processing and Management"},{"key":"2_CR148","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"801","DOI":"10.1007\/978-3-540-30182-0_80","volume-title":"Proceedings of the Symposium on Computer and Information Sciences","author":"B Cambazoglu","year":"2004","unstructured":"Cambazoglu B, Turk A, Aykanat C (2004) Data-parallel web crawling models. In: Proceedings of the Symposium on Computer and Information Sciences. Lecture Notes in Computer Science. Springer, Berlin\/Heidelberg, pp 801\u2013809"},{"key":"2_CR149","first-page":"1","volume-title":"Proceedings of the International Conference on Scalable Information Systems","author":"B Cambazoglu","year":"2008","unstructured":"Cambazoglu B, Plachouras V, Junqueira F, Telloli L (2008) On the feasibility of geographically distributed web crawling. In: Proceedings of the International Conference on Scalable Information Systems. ICST (Institute for Computer Sciences and Social-Informatics and Telecommunications Engineering), ICST, Brussels, pp 1\u201310"},{"key":"2_CR150","first-page":"411","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"B Cambazoglu","year":"2009","unstructured":"Cambazoglu B, Plachouras V, Baeza-Yates R (2009) Quantifying performance and quality gains in distributed web search engines. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 411\u2013418"},{"key":"2_CR151","doi-asserted-by":"publisher","first-page":"411","DOI":"10.1145\/1718487.1718538","volume-title":"Proceedings of the ACM Conference on Web Search and Data Mining","author":"B Cambazoglu","year":"2010","unstructured":"Cambazoglu B, Zaragoza H, Chapelle O, Chen J, Liao C, Zheng Z, Degenhardt J (2010a) Early exit optimizations for additive machine learned ranking systems. In: Proceedings of the ACM Conference on Web Search and Data Mining. ACM Press, New York, NY, pp 411\u2013420"},{"key":"2_CR152","first-page":"90","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"B Cambazoglu","year":"2010","unstructured":"Cambazoglu B, Varol E, Kayaaslan E, Aykanat C, Baeza-Yates R (2010b) Query forwarding in geographically distributed search engines. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 90\u201397"},{"key":"2_CR153","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1145\/1772690.1772710","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"B Cambazoglu","year":"2010","unstructured":"Cambazoglu B, Junqueira F, Plachouras V, Banachowski S, Cui B, Lim S, Bridge B (2010c) A refreshing perspective of search engine caching. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 181\u2013190"},{"key":"2_CR162","first-page":"43","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"D Carmel","year":"2001","unstructured":"Carmel D, Cohen D, Fagin R, Farchi E, Herscovici M, Maarek Y, Soffer A (2001) Static index pruning for information retrieval systems. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 43\u201350"},{"key":"2_CR168","first-page":"212","volume-title":"Proceedings of the Latin American Conference on World Wide Web","author":"C Castillo","year":"2003","unstructured":"Castillo C (2003) Cooperation schemes between a web server and a web search engine. In: Proceedings of the Latin American Conference on World Wide Web. IEEE CS, New York, NY, pp 212\u2013213"},{"issue":"11\u201316","key":"2_CR172","first-page":"1623","volume":"31","author":"S Chakrabarti","year":"1999","unstructured":"Chakrabarti S, van\u00a0den Berg M, Dom B (1999) Focused crawling: A new approach to topic-specific web resource discovery. Computer Networks and ISDN Systems 31(11\u201316):1623\u20131640","journal-title":"Computer Networks and ISDN Systems"},{"key":"2_CR176","first-page":"200","volume-title":"Proceedings of the International Conference on Very Large Data Bases","author":"J Cho","year":"2000","unstructured":"Cho J, Garcia-Molina H (2000) The evolution of the Web and implications for an incremental crawler. In: Proceedings of the International Conference on Very Large Data Bases. Morgan Kaufmann, San Francisco, CA, pp 200\u2013209"},{"key":"2_CR177","first-page":"124","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"J Cho","year":"2002","unstructured":"Cho J, Garcia-Molina H (2002) Parallel crawlers. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 124\u2013135"},{"issue":"4","key":"2_CR178","doi-asserted-by":"publisher","first-page":"390","DOI":"10.1145\/958942.958945","volume":"28","author":"J Cho","year":"2003","unstructured":"Cho J, Garcia-Molina H (2003) Effective page refresh policies for web crawlers. ACM Transactions on Database Systems 28(4):390\u2013426","journal-title":"ACM Transactions on Database Systems"},{"issue":"1\u20137","key":"2_CR179","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1016\/S0169-7552(98)00108-1","volume":"30","author":"J Cho","year":"1998","unstructured":"Cho J, Garcia-Molina H, Page L (1998) Efficient crawling through URL ordering. Computer Networks and ISDN Systems 30(1\u20137):161\u2013172","journal-title":"Computer Networks and ISDN Systems"},{"issue":"2","key":"2_CR180","doi-asserted-by":"publisher","first-page":"355","DOI":"10.1145\/335191.335429","volume":"29","author":"J Cho","year":"2000","unstructured":"Cho J, Shivakumar N, Garcia-Molina H (2000) Finding replicated web collections. ACM SIGMOD Record 29(2):355\u2013366","journal-title":"ACM SIGMOD Record"},{"key":"2_CR181","first-page":"435","volume-title":"Proceedings of the ACM Conference on Information and Knowledge Management","author":"A Chowdhury","year":"2003","unstructured":"Chowdhury A, Pass G (2003) Operational requirements for scalable search systems. In: Proceedings of the ACM Conference on Information and Knowledge Management. ACM Press, New York, NY, pp 435\u2013442"},{"issue":"2","key":"2_CR182","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1145\/506309.506311","volume":"20","author":"A Chowdhury","year":"2002","unstructured":"Chowdhury A, Frieder O, Grossman D, McCabe M (2002) Collection statistics for fast duplicate document detection. ACM Transactions on Information Systems 20(2):171\u2013191","journal-title":"ACM Transactions on Information Systems"},{"key":"2_CR185","first-page":"34","volume-title":"Proceedings of the ACM Conference on Information and Knowledge Management","author":"C Chung","year":"2002","unstructured":"Chung C, Clarke CA (2002) Topic-oriented collaborative crawling. In: Proceedings of the ACM Conference on Information and Knowledge Management. ACM Press, New York, NY, pp 34\u201342"},{"key":"2_CR187","unstructured":"Clarke CA, Cormack G, Burkowski F (1994) Fast inverted indexes with on-line update. Tech Rep CS-94-40, University of Waterloo"},{"key":"2_CR188","first-page":"135","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"CA Clarke","year":"2007","unstructured":"Clarke CA, Agichtein E, Dumais S, White R (2007) The influence of caption features on clickthrough patterns in web search. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 135\u2013142"},{"key":"2_CR194","first-page":"245","volume-title":"Proceedings of the ACM Conference on Information and Knowledge Management","author":"J Cooper","year":"2002","unstructured":"Cooper J, Coden A, Brown E (2002) Detecting similar documents using salient terms. In: Proceedings of the ACM Conference on Information and Knowledge Management. ACM Press, New York, NY, pp 245\u2013251"},{"key":"2_CR206","first-page":"405","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"D Cutting","year":"1990","unstructured":"Cutting D, Pedersen J (1990) Optimization for dynamic inverted index maintenance. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 405\u2013411"},{"key":"2_CR209","doi-asserted-by":"publisher","first-page":"421","DOI":"10.1145\/1242572.1242630","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"A Dasgupta","year":"2007","unstructured":"Dasgupta A, Ghosh A, Kumar R, Olston C, Pandey S, Tomkins A (2007) The discoverability of the Web. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 421\u2013430"},{"key":"2_CR212","first-page":"66","volume-title":"Proceedings of the International Conference on Distributed Computing Systems","author":"O de Kretser","year":"1998","unstructured":"de Kretser O, Moffat A, Shimmin T, Zobel J (1998) Methodologies for distributed information retrieval. In: Proceedings of the International Conference on Distributed Computing Systems. IEEE Computer Society, Washington, DC, p 66"},{"issue":"1)","key":"2_CR214","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean J, Ghemawat S (2008) MapReduce: simplified data processing on large clusters. Communications of the ACM 51(1)):107\u2013113","journal-title":"Communications of the ACM"},{"key":"2_CR223","first-page":"527","volume-title":"Proceedings of the International Conference on Very Large Data Bases","author":"M Diligenti","year":"2000","unstructured":"Diligenti M, Coetzee F, Lawrence S, Giles C, Gori M (2000) Focused crawling using context graphs. In: Proceedings of the International Conference on Very Large Data Bases. Morgan Kaufmann, San Francisco, CA, pp 527\u2013534"},{"key":"2_CR224","doi-asserted-by":"publisher","first-page":"311","DOI":"10.1145\/1772690.1772723","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"S Ding","year":"2010","unstructured":"Ding S, Attenberg J, Suel T (2010) Scalable techniques for document identifier assignment in inverted indexes. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 311\u2013320"},{"issue":"3","key":"2_CR228","doi-asserted-by":"publisher","first-page":"527","DOI":"10.1016\/S0306-4573(03)00008-6","volume":"40","author":"D D\u2019Souza","year":"2004","unstructured":"D\u2019Souza D, Thom J, Zobel J (2004) Collection selection for managed distributed document databases. Information Processing and Management 40(3):527\u2013546","journal-title":"Information Processing and Management"},{"key":"2_CR233","first-page":"106","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"J Edwards","year":"2001","unstructured":"Edwards J, McCurley K, Tomlin J (2001) An adaptive model for optimizing performance of an incremental web crawler. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 106\u2013113"},{"issue":"1\u20132","key":"2_CR236","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1016\/0169-7552(95)00107-3","volume":"28","author":"D Eichmann","year":"1995","unstructured":"Eichmann D (1995) Ethical web agents. Computer Networks and ISDN Systems 28(1\u20132):127\u2013136","journal-title":"Computer Networks and ISDN Systems"},{"key":"2_CR244","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1145\/1096985.1096999","volume-title":"Proceedings of the Workshop on Geographic Information Retrieval","author":"J Exposto","year":"2005","unstructured":"Exposto J, Macedo J, Pina A, Alves A, Rufino J (2005) Geographical partition for distributed web crawling. In: Proceedings of the Workshop on Geographic Information Retrieval. ACM Press, New York, NY, pp 55\u201360"},{"key":"2_CR245","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"544","DOI":"10.1007\/978-3-540-89524-4_54","volume-title":"Proceedings of the International Conference on Information Networking: Towards Ubiquitous Networking and Services","author":"J Exposto","year":"2008","unstructured":"Exposto J, Macedo J, Pina A, Alves A, Rufino J (2008) Efficient partitioning strategies for distributed web crawling. In: Proceedings of the International Conference on Information Networking: Towards Ubiquitous Networking and Services. Lecture Notes in Computer Science. Springer, Berlin\/Heidelberg, pp 544\u2013553"},{"issue":"1","key":"2_CR246","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1145\/1125857.1125859","volume":"24","author":"T Fagni","year":"2006","unstructured":"Fagni T, Perego R, Silvestri F, Orlando S (2006) Boosting the performance of web search engines: Caching and prefetching query results by exploiting historical usage data. ACM Transactions on Information Systems 24(1):51\u201378","journal-title":"ACM Transactions on Information Systems"},{"issue":"2","key":"2_CR252","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1002\/spe.577","volume":"34","author":"D Fetterly","year":"2004","unstructured":"Fetterly D, Manasse M, Najork M, Wiener J (2004) A large-scale study of the evolution of web pages. Software: Practice and Experience 34(2):213\u2013237","journal-title":"Software: Practice and Experience"},{"key":"2_CR253","first-page":"580","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"D Fetterly","year":"2009","unstructured":"Fetterly D, Craswell N, Vinay V (2009) The impact of crawl policy on web search effectiveness. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 580\u2013587"},{"key":"2_CR265","unstructured":"Fox E, Lee W (1991) FAST-INV: A fast algorithm for building large inverted files. Tech Rep 91\u201310, Virginia Polytechnic Institute and State University"},{"key":"2_CR273","first-page":"431","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"Q Gan","year":"2009","unstructured":"Gan Q, Suel T (2009) Improved techniques for result caching in web search engines. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 431\u2013440"},{"key":"2_CR274","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1145\/1135777.1135822","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"W Gao","year":"2006","unstructured":"Gao W, Lee H, Miao Y (2006) Geographically focused collaborative crawling. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 287\u2013296"},{"key":"2_CR279","first-page":"78","volume-title":"Proceedings of the International Conference on Very Large Data Bases","author":"L Gravano","year":"1995","unstructured":"Gravano L, Garcia-Molina H (1995) Generalizing GlOSS to vector-space databases and broker hierarchies. In: Proceedings of the International Conference on Very Large Data Bases. Morgan Kaufmann, San Francisco, CA, pp 78\u201389"},{"key":"2_CR286","unstructured":"Gy\u00f6ngyi Z, Garcia-Molina H (2005a) Link spam alliances. In: Proceedings of the International Conference on Very Large Data Bases. VLDB Endowment, pp\u00a0517\u2013528"},{"key":"2_CR287","unstructured":"Gy\u00f6ngyi Z, Garcia-Molina H (2005b) Web spam taxonomy. http:\/\/airweb.cse.lehigh.edu\/2005\/gyongyi.pdf, visited on February, 2011"},{"key":"2_CR288","doi-asserted-by":"crossref","unstructured":"Gy\u00f6ngyi Z, Garcia-Molina H, Pedersen J (2004) Combating web spam with TrustRank. In: Proceedings of the International Conference on Very Large Data Bases. VLDB Endowment, pp\u00a0576\u2013587","DOI":"10.1016\/B978-012088469-8.50052-8"},{"issue":"8","key":"2_CR298","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1002\/(SICI)1097-4571(199012)41:8<581::AID-ASI4>3.0.CO;2-U","volume":"41","author":"D Harman","year":"1990","unstructured":"Harman D, Candela G (1990) Retrieving records from a gigabyte of text on a mini-computer using statistical ranking. Journal of the American Society for Information Science 41(8):581\u2013589","journal-title":"Journal of the American Society for Information Science"},{"key":"2_CR299","first-page":"28","volume-title":"Information Retrieval: Data Structures and Algorithms","author":"D Harman","year":"1992","unstructured":"Harman D, Baeza-Yates R, Fox E, Lee W (1992) Inverted files. In: Baeza-Yates WBFR (ed) Information Retrieval: Data Structures and Algorithms. Prentice-Hall, Upper Saddle River, NJ, pp 28\u201343"},{"key":"2_CR302","first-page":"127","volume-title":"Proceedings of the European Conference on Digital Libraries","author":"D Hawking","year":"1997","unstructured":"Hawking D (1997) Scalable text retrieval for large digital libraries. In: Proceedings of the European Conference on Digital Libraries. Springer, London, pp 127\u2013145"},{"issue":"8","key":"2_CR311","doi-asserted-by":"crossref","first-page":"713","DOI":"10.1002\/asi.10268","volume":"54","author":"S Heinz","year":"2003","unstructured":"Heinz S, Zobel J (2003) Efficient single-pass index construction for text databases. Journal of the American Society for Information Science 54(8):713\u2013729","journal-title":"Journal of the American Society for Information Science"},{"key":"2_CR313","first-page":"284","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"M Henzinger","year":"2006","unstructured":"Henzinger M (2006) Finding near-duplicate web pages: A large-scale evaluation of algorithms. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 284\u2013291"},{"issue":"4","key":"2_CR315","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1023\/A:1019213109274","volume":"2","author":"A Heydon","year":"1999","unstructured":"Heydon A, Najork M (1999) Mercator: a scalable, extensible web crawler. World Wide Web 2(4):219\u2013229","journal-title":"World Wide Web"},{"key":"2_CR319","first-page":"277","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"J Hirai","year":"2000","unstructured":"Hirai J, Raghavan S, Garcia-Molina H, Paepcke A (2000) WebBase: a repository of web pages. In: Proceedings of the International Conference on the World Wide Web. North-Holland, Amsterdam, pp 277\u2013293"},{"key":"2_CR346","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1145\/775152.775191","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"G Jeh","year":"2003","unstructured":"Jeh G, Widom J (2003) Scaling personalized web search. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 271\u2013279"},{"issue":"2","key":"2_CR348","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1109\/71.342125","volume":"6","author":"BS Jeong","year":"1995","unstructured":"Jeong BS, Omiecinski E (1995) Inverted file partitioning schemes in multiple disk systems. IEEE Transactions on Parallel and Distributed Systems 6(2):142\u2013153","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"2_CR349","first-page":"133","volume-title":"Proceedings of the ACM Conference on Knowledge Discovery and Data Mining","author":"T Joachims","year":"2002","unstructured":"Joachims T (2002) Optimizing search engines using clickthrough data. In: Proceedings of the ACM Conference on Knowledge Discovery and Data Mining. ACM Press, New York, NY, pp 133\u2013142"},{"issue":"2","key":"2_CR354","doi-asserted-by":"publisher","first-page":"118","DOI":"10.1145\/276305.276316","volume":"27","author":"B J\u00f3nsson","year":"1998","unstructured":"J\u00f3nsson B, Franklin M, Srivastava D (1998) Interaction of query evaluation and buffer management for information retrieval. ACM SIGMOD Record 27(2):118\u2013129","journal-title":"ACM SIGMOD Record"},{"key":"2_CR359","doi-asserted-by":"crossref","unstructured":"Kayaaslan E, Cambazoglu B, Aykanat C (2010) Document replication strategies for geographically distributed Web search engines. To be submitted","DOI":"10.1145\/1835449.1835467"},{"key":"2_CR377","unstructured":"Kulkarni A, Callan J (2010) Topic-based index partitions for efficient and effective selective search. http:\/\/www.lsdsir.org\/, visited on February, 2011"},{"key":"2_CR384","first-page":"282","volume-title":"Proceedings of the ACM Conference on Information and Knowledge Management","author":"L Larkey","year":"2000","unstructured":"Larkey L, Connell M, Callan J (2000) Collection selection and results merging with topically organized US patents and TREC data. In: Proceedings of the ACM Conference on Information and Knowledge Management. ACM Press, New York, NY, pp 282\u2013289"},{"issue":"1","key":"2_CR388","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1145\/333175.333181","volume":"11","author":"S Lawrence","year":"2000","unstructured":"Lawrence S, Giles C (2000) Accessibility of information on the Web. Intelligence 11(1):32\u201339","journal-title":"Intelligence"},{"key":"2_CR389","first-page":"427","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"HT Lee","year":"2008","unstructured":"Lee HT, Leonard D, Wang X, Loguinov D (2008) IRLbot: Scaling to 6 billion pages and beyond. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 427\u2013436"},{"key":"2_CR390","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1145\/775152.775156","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"R Lempel","year":"2003","unstructured":"Lempel R, Moran S (2003) Predictive caching and prefetching of query results in search engines. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 19\u201328"},{"key":"2_CR391","first-page":"15","volume-title":"Proceedings of the Australasian Database Conference","author":"N Lester","year":"2004","unstructured":"Lester N, Zobel J, Williams H (2004) In-place versus re-build versus re-merge: Index maintenance strategies for text retrieval systems. In: Proceedings of the Australasian Database Conference. Australian Computer Society, Darlinghurst, pp 15\u201323"},{"issue":"3","key":"2_CR392","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1386118.1386125","volume":"33","author":"N Lester","year":"2008","unstructured":"Lester N, Moffat A, Zobel J (2008) Efficient online index construction for text databases. ACM Transactions on Database Systems 33(3):1\u201333","journal-title":"ACM Transactions on Database Systems"},{"issue":"6","key":"2_CR395","doi-asserted-by":"publisher","first-page":"817","DOI":"10.1177\/0165551508089396","volume":"34","author":"D Lewandowskii","year":"2008","unstructured":"Lewandowskii D (2008) A three-year study on the freshness of web search engine databases. Journal of Information Science 34(6):817\u2013831","journal-title":"Journal of Information Science"},{"key":"2_CR400","first-page":"186","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"X Liu","year":"2004","unstructured":"Liu X, Croft W (2004) Cluster-based retrieval using language models. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 186\u2013193"},{"key":"2_CR402","first-page":"558","volume-title":"Proceedings of the ACM Conference on Information and Knowledge Management","author":"F Liu","year":"2002","unstructured":"Liu F, Yu C, Meng W (2002) Personalized web search by mapping user queries to categories. In: Proceedings of the ACM Conference on Information and Knowledge Management. ACM Press, New York, NY, pp 558\u2013565"},{"key":"2_CR407","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1145\/1060745.1060785","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"X Long","year":"2005","unstructured":"Long X, Suel T (2005) Three-level caching for efficient query processing in large web search engines. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 257\u2013266"},{"key":"2_CR408","first-page":"97","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"Z Lu","year":"1999","unstructured":"Lu Z, McKinley K (1999) Partial replica selection based on relevance for information retrieval. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 97\u2013104"},{"key":"2_CR409","first-page":"248","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"Z Lu","year":"2000","unstructured":"Lu Z, McKinley K (2000) Partial collection replication versus caching for information retrieval systems. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 248\u2013255"},{"key":"2_CR410","first-page":"1","volume-title":"Proceedings of the International Conference on Scalable Information Systems","author":"C Lucchese","year":"2007","unstructured":"Lucchese C, Orlando S, Perego R, Silvestri F (2007) Mining query logs to optimize index partitioning in parallel web search engines. In: Proceedings of the International Conference on Scalable Information Systems. ICST (Institute for Computer Sciences, Social-Informatics and Telecommunications Engineering), Brussels, Belgium, pp 1\u20139"},{"key":"2_CR415","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1109\/SPIRE.2000.878197","volume-title":"Proceedings of the International Symposium on String Processing Information Retrieval","author":"A MacFarlane","year":"2000","unstructured":"MacFarlane A, McCann J, Robertson S (2000) Parallel search using partitioned inverted files. In: Proceedings of the International Symposium on String Processing Information Retrieval. IEEE Computer Society, Washington, DC, pp 209\u2013220"},{"issue":"2","key":"2_CR425","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1016\/S0140-3664(00)00308-X","volume":"24","author":"E Markatos","year":"2001","unstructured":"Markatos E (2001) On caching search engine query results. Computer Communications 24(2):137\u2013143","journal-title":"Computer Communications"},{"issue":"3","key":"2_CR429","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1145\/502115.502116","volume":"19","author":"S Melnik","year":"2001","unstructured":"Melnik S, Raghavan S, Yang B, Garcia-Molina H (2001) Building a distributed full-text index for the Web. ACM Transactions on Information Systems 19(3):217\u2013241","journal-title":"ACM Transactions on Information Systems"},{"issue":"7","key":"2_CR440","doi-asserted-by":"publisher","first-page":"537","DOI":"10.1002\/(SICI)1097-4571(199508)46:7<537::AID-ASI7>3.0.CO;2-P","volume":"46","author":"A Moffat","year":"1995","unstructured":"Moffat A, Bell TH (1995) In situ generation of compressed inverted files. Journal of the American Society for Information Science 46(7):537\u2013550","journal-title":"Journal of the American Society for Information Science"},{"issue":"1","key":"2_CR441","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1023\/A:1013002601898","volume":"3","author":"A Moffat","year":"2000","unstructured":"Moffat A, Stuiver L (2000) Binary interpolative coding for effective index compression. Journal of Information Retrieval 3(1):25\u201347","journal-title":"Journal of Information Retrieval"},{"issue":"4","key":"2_CR442","doi-asserted-by":"publisher","first-page":"349","DOI":"10.1145\/237496.237497","volume":"14","author":"A Moffat","year":"1996","unstructured":"Moffat A, Zobel J (1996) Self-indexing inverted files for fast text retrieval. ACM Transactions on Information Systems 14(4):349\u2013379","journal-title":"ACM Transactions on Information Systems"},{"issue":"3","key":"2_CR443","doi-asserted-by":"publisher","first-page":"205","DOI":"10.1007\/s10791-006-9014-4","volume":"10","author":"A Moffat","year":"2007","unstructured":"Moffat A, Webber W, Zobel J, Baeza-Yates R (2007) A pipelined architecture for distributed text query evaluation. Journal of Information Retrieval 10(3):205\u2013231","journal-title":"Journal of Information Retrieval"},{"key":"2_CR446","first-page":"114","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"M Najork","year":"2001","unstructured":"Najork M, Wiener J (2001) Breadth-first crawling yields high-quality pages. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 114\u2013118"},{"key":"2_CR456","first-page":"191","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"A Ntoulas","year":"2007","unstructured":"Ntoulas A, Cho J (2007) Pruning policies for two-tiered inverted index with correctness guarantee. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 191\u2013198"},{"key":"2_CR457","first-page":"1","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"A Ntoulas","year":"2004","unstructured":"Ntoulas A, Cho J, Olston C (2004) What\u2019s new on the Web?: The evolution of the Web from a search engine perspective. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 1\u201312"},{"key":"2_CR460","first-page":"437","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"C Olston","year":"2008","unstructured":"Olston C, Pandey S (2008) Recrawl scheduling based on information longevity. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 437\u2013446"},{"key":"2_CR462","first-page":"1169","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"R Ozcan","year":"2008","unstructured":"Ozcan R, Altingovde I, Ulusoy O (2008) Static query result caching revisited. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 1169\u20131170"},{"key":"2_CR463","unstructured":"Page L, Brin S, Motwani R, Winograd T (1999) The PageRank citation ranking: Bringing order to the Web. http:\/\/ilpubs.stanford.edu:8090\/422\/, visited on February, 2011"},{"key":"2_CR465","doi-asserted-by":"publisher","first-page":"401","DOI":"10.1145\/1060745.1060805","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"S Pandey","year":"2005","unstructured":"Pandey S, Olston C (2005) User-centric web crawling. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 401\u2013411"},{"key":"2_CR466","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1145\/1341531.1341535","volume-title":"Proceedings of the ACM Conference on web Search and Data Mining","author":"S Pandey","year":"2008","unstructured":"Pandey S, Olston C (2008) Crawl ordering by search impact. In: Proceedings of the ACM Conference on web Search and Data Mining. ACM Press, New York, NY, pp 3\u201314"},{"key":"2_CR471","first-page":"339","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"M Persin","year":"1994","unstructured":"Persin M (1994) Document filtering for fast ranking. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 339\u2013348"},{"issue":"9","key":"2_CR477","doi-asserted-by":"crossref","first-page":"50","DOI":"10.1145\/567498.567526","volume":"45","author":"J Pitkow","year":"2002","unstructured":"Pitkow J, Sch\u00fctze H, Cass T, Cooley R, Turnbull D, Edmonds A, Adar E, Breuel T (2002) Personalized search. Communications of the ACM 45(9):50\u201355","journal-title":"Communications of the ACM"},{"issue":"2","key":"2_CR484","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1740592.1740593","volume":"28","author":"D Puppin","year":"2010","unstructured":"Puppin D, Silvestri F, Perego R, Baeza-Yates R (2010) Tuning the capacity of search engines: Load-driven routing and incremental caching to reduce and balance the load. ACM Transactions on Information Systems 28(2):1\u201336","journal-title":"ACM Transactions on Information Systems"},{"issue":"4","key":"2_CR489","doi-asserted-by":"publisher","first-page":"384","DOI":"10.1016\/S0140-3664(01)00410-8","volume":"25","author":"P Radoslavov","year":"2002","unstructured":"Radoslavov P, Govindan R, Estrin D (2002) Topology-informed Internet replica placement. Computer Communications 25(4):384\u2013392","journal-title":"Computer Communications"},{"key":"2_CR490","doi-asserted-by":"publisher","first-page":"781","DOI":"10.1145\/1772690.1772770","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"D Rafiei","year":"2010","unstructured":"Rafiei D, Bharat K, Shukla A (2010) Diversifying web search results. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 781\u2013790"},{"key":"2_CR491","first-page":"129","volume-title":"Proceedings of the International Conference on Very Large Data Bases","author":"S Raghavan","year":"2001","unstructured":"Raghavan S, Garcia-Molina H (2001) Crawling the hidden Web. In: Proceedings of the International Conference on Very Large Data Bases. Morgan Kaufmann, San Francisco, CA, pp 129\u2013138"},{"key":"2_CR492","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1007\/3-540-36618-0_15","volume-title":"Proceedings of the European Conference on Information Retrieval","author":"Y Rasolofo","year":"2003","unstructured":"Rasolofo Y, Savoy J (2003) Term proximity scoring for keyword-based retrieval systems. In: Sebastiani F (ed) Proceedings of the European Conference on Information Retrieval. Lecture Notes in Computer Science, vol 2633. Springer, Berlin\/Heidelberg, pp\u00a079. doi:10.1007\/3-540-36618-0_15, visited on December, 2010"},{"key":"2_CR495","doi-asserted-by":"publisher","first-page":"182","DOI":"10.1145\/276675.276695","volume-title":"Proceedings of the ACM Conference on Digital Libraries","author":"B Ribeiro-Neto","year":"1998","unstructured":"Ribeiro-Neto B, Barbosa R (1998) Query performance for tightly coupled distributed digital libraries. In: Proceedings of the ACM Conference on Digital Libraries. ACM Press, New York, NY, pp 182\u2013190"},{"key":"2_CR496","first-page":"149","volume-title":"Proceedings of the Conference of the Chilean Computer Science Society","author":"B Ribeiro-Neto","year":"1998","unstructured":"Ribeiro-Neto B, Kitajima J, Navarro G, Sant\u2019Ana C, Ziviani N (1998) Parallel generation of inverted files for distributed text collections. In: Proceedings of the Conference of the Chilean Computer Science Society. IEEE Computer Society, Washington, DC, pp 149\u2013157"},{"key":"2_CR497","first-page":"105","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"B Ribeiro-Neto","year":"1999","unstructured":"Ribeiro-Neto B, Moura E, Neubert M, Ziviani N (1999) Efficient distributed algorithms to build inverted files. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 105\u2013112"},{"key":"2_CR501","first-page":"132","volume-title":"Proceedings of the Latin American Conference on World Wide Web","author":"K Risvik","year":"2003","unstructured":"Risvik K, Aasheim Y, Lidal M (2003) Multi-tier architecture for web search engines. In: Proceedings of the Latin American Conference on World Wide Web. IEEE Computer Society, Washington, DC, p 132"},{"key":"2_CR527","first-page":"51","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"P Saraiva","year":"2001","unstructured":"Saraiva P, Silva\u00a0de Moura E, Ziviani N, Meira W, Fonseca R, Riberio-Neto B (2001) Rank-preserving two-level caching for scalable search engines. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 51\u201358"},{"key":"2_CR528","first-page":"595","volume-title":"Proceedings of the European Conference on Information Retrieval","author":"C Sarigiannis","year":"2009","unstructured":"Sarigiannis C, Plachouras V, Baeza-Yates R (2009) A study of the impact of index updates on distributed query processing for web search. In: Proceedings of the European Conference on Information Retrieval. Springer, Berlin\/Heidelberg, pp 595\u2013602"},{"key":"2_CR531","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1007\/978-3-540-75530-2_26","volume-title":"Proceedings of the International Symposium on String Processing Information Retrieval","author":"R Schenkel","year":"2007","unstructured":"Schenkel R, Broschart A, Hwang S, Theobald M, Weikum G (2007) Efficient text proximity search. In: Proceedings of the International Symposium on String Processing Information Retrieval. Lecture Notes in Computer Science, vol 4726. Springer, Berlin\/Heidelberg, pp 287\u2013299"},{"key":"2_CR532","first-page":"222","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"F Scholer","year":"2002","unstructured":"Scholer F, Williams H, Yiannis J, Zobel J (2002) Compression of inverted indexes for fast query evaluation. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 222\u2013229"},{"key":"2_CR533","unstructured":"Schurman E, Brutlag J (2009) Performance related changes and their user impact. http:\/\/velocityconference.blip.tv\/file\/2279751\/, visited on February, 2011"},{"issue":"2","key":"2_CR539","doi-asserted-by":"crossref","first-page":"275","DOI":"10.1016\/S0306-4573(03)00087-6","volume":"41","author":"WY Shieh","year":"2005","unstructured":"Shieh WY, Chung CP (2005) A statistics-based approach to incrementally update inverted files. Information Processing and Management 41(2):275\u2013288","journal-title":"Information Processing and Management"},{"issue":"1","key":"2_CR540","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1016\/S0306-4573(02)00020-1","volume":"39","author":"WY Shieh","year":"2003","unstructured":"Shieh WY, Chen TF, Shann J, Chung CP (2003) Inverted file compression through document identifier reassignment. Information Processing and Management 39(1):117\u2013131","journal-title":"Information Processing and Management"},{"key":"2_CR541","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1109\/ICDE.2002.994750","volume-title":"Proceedings of the International Conference on Data Engineering","author":"V Shkapenyuk","year":"2002","unstructured":"Shkapenyuk V, Suel T (2002) Design and implementation of a high-performance distributed web crawler. In: Proceedings of the International Conference on Data Engineering. IEEE Computer Society, Washington, DC, p 357"},{"key":"2_CR546","first-page":"391","volume-title":"Proceedings of the ACM Conference on Information and Knowledge Management","author":"L Si","year":"2002","unstructured":"Si L, Jin R, Callan J, Ogilvie P (2002a) A language modeling framework for resource selection and results merging. In: Proceedings of the ACM Conference on Information and Knowledge Management. ACM Press, New York, NY, pp 391\u2013397"},{"key":"2_CR548","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"101","DOI":"10.1007\/978-3-540-71496-5_12","volume-title":"Proceedings of the European Conference on Information Retrieval","author":"F Silvestri","year":"2007","unstructured":"Silvestri F (2007) Sorting out the document identifier assignment problem. In: Amati G, Carpineto C, Romano G (eds) Proceedings of the European Conference on Information Retrieval. Lecture Notes in Computer Science, vol 4425. Springer, Berlin\/Heidelberg, pp 101\u2013112"},{"key":"2_CR549","first-page":"305","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"F Silvestri","year":"2004","unstructured":"Silvestri F, Orlando S, Perego R (2004) Assigning identifiers to documents to enhance the clustering property of fulltext indexes. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 305\u2013312"},{"key":"2_CR550","first-page":"131","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"G Skobeltsyn","year":"2008","unstructured":"Skobeltsyn G, Junqueira F, Plachouras V, Baeza-Yates R (2008) ResIn: a combination of results caching and index pruning for high-performance web search engines. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 131\u2013138"},{"key":"2_CR557","first-page":"219","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"T Strohman","year":"2005","unstructured":"Strohman T, Turtle H, Croft W (2005) Optimization strategies for complex queries. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 219\u2013225"},{"key":"2_CR560","doi-asserted-by":"publisher","first-page":"382","DOI":"10.1145\/1060745.1060803","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"JT Sun","year":"2005","unstructured":"Sun JT, Zeng HJ, Liu H, Lu Y, Chen Z (2005) CubeSVD: a novel approach to personalized web search. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 382\u2013390"},{"key":"2_CR567","doi-asserted-by":"publisher","first-page":"718","DOI":"10.1145\/1150402.1150493","volume-title":"Proceedings of the ACM Conference on Knowledge Discovery and Data Mining","author":"B Tan","year":"2006","unstructured":"Tan B, Shen X, Zhai C (2006) Mining long-term search history to improve search accuracy. In: Proceedings of the ACM Conference on Knowledge Discovery and Data Mining. ACM Press, New York, NY, pp 718\u2013723"},{"key":"2_CR571","first-page":"449","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"J Teevan","year":"2005","unstructured":"Teevan J, Dumais S, Horvitz E (2005) Personalizing search via automated analysis of interests and activities. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 449\u2013456"},{"issue":"2","key":"2_CR574","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1145\/170036.170063","volume":"22","author":"A Tomasic","year":"1993","unstructured":"Tomasic A, Garcia-Molina H (1993) Caching and database scaling in distributed shared-nothing information retrieval systems. ACM SIGMOD Record 22(2):129\u2013138","journal-title":"ACM SIGMOD Record"},{"key":"2_CR575","first-page":"289","volume-title":"Proceedings of the ACM Conference on Management of Data","author":"A Tomasic","year":"1994","unstructured":"Tomasic A, Garcia-Molina H, Shoens K (1994) Incremental updates of inverted lists for text document retrieval. In: Proceedings of the ACM Conference on Management of Data. ACM Press, New York, NY, pp 289\u2013300"},{"issue":"3","key":"2_CR576","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1145\/256163.256165","volume":"15","author":"A Tomasic","year":"1997","unstructured":"Tomasic A, Gravano L, Lue C, Schwarz P, Haas L (1997) Data structures for efficient broker implementation. ACM Transactions on Information Systems 15(3):223\u2013253","journal-title":"ACM Transactions on Information Systems"},{"key":"2_CR579","unstructured":"Tonellotto N, Macdonald C, Ounis I (2010) Efficient dynamic pruning with proximity support. http:\/\/www.lsdsir.org\/wp-content\/uploads\/2010\/05\/lsdsir10-5.pdf, visited on February, 2011"},{"key":"2_CR582","first-page":"127","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"A Turpin","year":"2007","unstructured":"Turpin A, Tsegay Y, Hawking D, Williams H (2007) Fast generation of result snippets in web search. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 127\u2013134"},{"issue":"6","key":"2_CR583","doi-asserted-by":"publisher","first-page":"831","DOI":"10.1016\/0306-4573(95)00020-H","volume":"31","author":"H Turtle","year":"1995","unstructured":"Turtle H, Flood J (1995) Query evaluation: Strategies and optimizations. Information Processing and Management 31(6):831\u2013850","journal-title":"Information Processing and Management"},{"key":"2_CR593","doi-asserted-by":"publisher","first-page":"622","DOI":"10.1145\/1183614.1183703","volume-title":"Proceedings of the ACM Conference on Information and Knowledge Management","author":"R Varadarajan","year":"2006","unstructured":"Varadarajan R, Hristidis V (2006) A system for query-specific document summarization. In: Proceedings of the ACM Conference on Information and Knowledge Management. ACM Press, New York, NY, pp 622\u2013631"},{"key":"2_CR603","first-page":"138","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"L Wang","year":"2010","unstructured":"Wang L, Lin J, Metzler D (2010) Learning to efficiently rank. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 138\u2013145"},{"key":"2_CR617","volume-title":"Managing Gigabytes: Compressing and Indexing Documents and Images","author":"I Witten","year":"1999","unstructured":"Witten I, Moffat A, Bell T (1999) Managing Gigabytes: Compressing and Indexing Documents and Images, 2nd edn. Morgan Kaufmann, San Francisco, CA","edition":"2"},{"key":"2_CR618","first-page":"136","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"J Wolf","year":"2002","unstructured":"Wolf J, Squillante M, Yu P, Sethuraman J, Ozsen L (2002) Optimal crawling strategies for web search engines. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 136\u2013147"},{"issue":"5","key":"2_CR619","doi-asserted-by":"publisher","first-page":"647","DOI":"10.1016\/0306-4573(93)90085-R","volume":"29","author":"WP Wong","year":"1993","unstructured":"Wong WP, Lee D (1993) Implementations of partial document ranking using inverted files. Information Processing and Management 29(5):647\u2013669","journal-title":"Information Processing and Management"},{"key":"2_CR623","first-page":"112","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"J Xu","year":"1998","unstructured":"Xu J, Callan J (1998) Effective retrieval with distributed collections. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 112\u2013120"},{"key":"2_CR624","first-page":"254","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"J Xu","year":"1999","unstructured":"Xu J, Croft W (1999) Cluster-based language models for distributed retrieval. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 254\u2013261"},{"key":"2_CR625","first-page":"147","volume-title":"Proceedings of the ACM Conference on Research and Development in Information Retrieval","author":"H Yan","year":"2009","unstructured":"Yan H, Ding S, Suel T (2009a) Compressing term positions in web indexes. In: Proceedings of the ACM Conference on Research and Development in Information Retrieval. ACM Press, New York, NY, pp 147\u2013154"},{"key":"2_CR626","first-page":"401","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"H Yan","year":"2009","unstructured":"Yan H, Ding S, Suel T (2009b) Inverted index compression and query processing with optimized document ordering. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 401\u2013410"},{"key":"2_CR632","doi-asserted-by":"publisher","first-page":"421","DOI":"10.1145\/1718487.1718540","volume-title":"Proceedings of the ACM Conference on web Search and Data Mining","author":"F Yu","year":"2010","unstructured":"Yu F, Xie Y, Ke Q (2010) Sbotminer: Large scale search bot detection. In: Proceedings of the ACM Conference on web Search and Data Mining. ACM Press, New York, NY, pp 421\u2013430"},{"key":"2_CR633","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1142\/9789812819536_0005","volume-title":"Proceedings of the International Conference on Database Systems for Advanced Applications","author":"B Yuwono","year":"1997","unstructured":"Yuwono B, Lee D (1997) Server ranking for distributed text retrieval systems on the Internet. In: Proceedings of the International Conference on Database Systems for Advanced Applications. World Scientific, Singapore, pp 41\u201350"},{"key":"2_CR636","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1007\/3-540-45431-4_6","volume-title":"Proceedings of the International Workshop on Next Generation Information Technologies and Systems","author":"D Zeinalipour-Yazti","year":"2002","unstructured":"Zeinalipour-Yazti D, Dikaiakos M (2002) Design and implementation of a distributed crawler and filtering processor. In: Proceedings of the International Workshop on Next Generation Information Technologies and Systems. Springer, London, pp 58\u201374"},{"key":"2_CR637","first-page":"387","volume-title":"Proceedings of the International Conference on the World Wide Web","author":"J Zhang","year":"2008","unstructured":"Zhang J, Long X, Suel T (2008) Performance of compressed inverted list caching in search engines. In: Proceedings of the International Conference on the World Wide Web. ACM Press, New York, NY, pp 387\u2013396"},{"issue":"2","key":"2_CR641","doi-asserted-by":"publisher","DOI":"10.1145\/1132956.1132959","volume":"38","author":"J Zobel","year":"2006","unstructured":"Zobel J, Moffat A (2006) Inverted files for text search engines. ACM Computing Surveys 38(2):6","journal-title":"ACM Computing Surveys"}],"container-title":["The Information Retrieval Series","Advanced Topics in Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-20946-8_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,30]],"date-time":"2023-01-30T14:24:29Z","timestamp":1675088669000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-20946-8_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011]]},"ISBN":["9783642209451","9783642209468"],"references-count":167,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-20946-8_2","relation":{},"ISSN":["1387-5264"],"issn-type":[{"value":"1387-5264","type":"print"}],"subject":[],"published":{"date-parts":[[2011]]}}}