{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T15:11:20Z","timestamp":1781104280949,"version":"3.54.1"},"reference-count":17,"publisher":"National Academy of Sciences","issue":"32","license":[{"start":{"date-parts":[[2018,7,23]],"date-time":"2018-07-23T00:00:00Z","timestamp":1532304000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"content-domain":{"domain":["www.pnas.org"],"crossmark-restriction":true},"short-container-title":["Proc. Natl. Acad. Sci. U.S.A."],"published-print":{"date-parts":[[2018,8,7]]},"abstract":"<jats:title>Significance<\/jats:title>\n                  <jats:p>We present a tractable algorithm that provides a near-optimal solution to the crawling problem, a fundamental challenge at the heart of web search: Given a large quantity of distributed and dynamic web content, what pages do we choose to update a local cache with the goal of serving up-to-date pages to client requests? Solving this optimization requires identifying the best set of pages to refresh given popularity rates and change rates\u2014an intractable problem in the general case. To overcome this intractability, we show that the optimal randomized strategy can be efficiently determined (in near-linear time) and then use it to produce a deterministic policy that exhibits excellent performance in experiments.<\/jats:p>","DOI":"10.1073\/pnas.1801519115","type":"journal-article","created":{"date-parts":[[2018,7,23]],"date-time":"2018-07-23T15:15:17Z","timestamp":1532358917000},"page":"8099-8103","update-policy":"https:\/\/doi.org\/10.1073\/pnas.cm10313","source":"Crossref","is-referenced-by-count":18,"title":["Tractable near-optimal policies for crawling"],"prefix":"10.1073","volume":"115","author":[{"given":"Yossi","family":"Azar","sequence":"first","affiliation":[{"name":"Department of Computer Science, Tel-Aviv University, Tel-Aviv 69978, Israel;"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Eric","family":"Horvitz","sequence":"additional","affiliation":[{"name":"Microsoft Research, Redmond, WA 98052;"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Eyal","family":"Lubetzky","sequence":"additional","affiliation":[{"name":"Courant Institute of Mathematical Sciences, New York University, New York, NY 10012;"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuval","family":"Peres","sequence":"additional","affiliation":[{"name":"Microsoft Research, Redmond, WA 98052;"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dafna","family":"Shahaf","sequence":"additional","affiliation":[{"name":"School of Computer Science, Hebrew University, Jerusalem 91904, Israel"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"341","published-online":{"date-parts":[[2018,7,23]]},"reference":[{"key":"e_1_3_2_1_2","doi-asserted-by":"publisher","DOI":"10.1145\/958942.958945"},{"key":"e_1_3_2_2_2","doi-asserted-by":"crossref","unstructured":"J Cho H Garcia-Molina Synchronizing a database to improve freshness. Proceedings of the 2000 ACM SIGMOD International Conference on Management of Data May 16-18 2000 Dallas TX (Association for Computing Machinery New York) 117\u2013128. (2000).","DOI":"10.1145\/335191.335391"},{"key":"e_1_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/857166.857170"},{"key":"e_1_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1099-1425(199806)1:1<15::AID-JOS3>3.0.CO;2-K"},{"key":"e_1_3_2_5_2","unstructured":"C Castillo Effective web crawling. PhD thesis (University of Chile Santiago). Available at www.dcc.uchile.cl\/tesis\/doctorado\/Castillo_Ocaranza.pdf. (2004)."},{"key":"e_1_3_2_6_2","first-page":"126","volume-title":"Proceedings of the 1999 IEEE INFOCOM","author":"Breslau L","year":"1999","unstructured":"L Breslau, P Cao, L Fan, G Phillips, S Shenker, Web caching and Zipf-like distributions: Evidence and implications. Proceedings of the 1999 IEEE INFOCOM (IEEE, New York), pp. 126\u2013134 (1999)."},{"key":"e_1_3_2_7_2","doi-asserted-by":"crossref","unstructured":"A Bonato A Course on the Web Graph Graduate Studies in Mathematics (American Mathematical Society AARMS Providence RI) Vol 89 pp. xii 184. (2008).","DOI":"10.1090\/gsm\/089"},{"key":"e_1_3_2_8_2","first-page":"175","volume-title":"Proceedings of the 1998 ACM Conference on Information and Knowledge Management","author":"Horvitz E","year":"1998","unstructured":"E Horvitz, Continual computation policies for utility-directed prefetching. Proceedings of the 1998 ACM Conference on Information and Knowledge Management (Association for Computing Machinery, New York), pp. 175\u2013184 (1998)."},{"key":"e_1_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(00)00082-5"},{"key":"e_1_3_2_10_2","unstructured":"D Shahaf E Horvitz Investigations of continual computation in IJCAI 2009 Proceedings of the 21st International Joint Conference on Artificial Intelligence Pasadena CA USA (Association for the Advancement of Artifical Intelligence Palo Alto CA) July 11-17 2009 pp. 285\u2013291. (2009)."},{"key":"e_1_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/504794.504797"},{"key":"e_1_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2007.1041"},{"key":"e_1_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1016\/S0166-218X(97)00119-4"},{"key":"e_1_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1287\/moor.27.3.518.314"},{"key":"e_1_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/321738.321743"},{"key":"e_1_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1016\/0012-365X(80)90269-1"},{"key":"e_1_3_2_17_2","unstructured":"O Angel AE Holroyd JB Martin J Propp Discrete low-discrepancy sequences. arXiv:0910.1077. (2009)."}],"container-title":["Proceedings of the National Academy of Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/pnas.org\/doi\/pdf\/10.1073\/pnas.1801519115","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,13]],"date-time":"2022-04-13T03:45:55Z","timestamp":1649821555000},"score":1,"resource":{"primary":{"URL":"https:\/\/pnas.org\/doi\/full\/10.1073\/pnas.1801519115"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,7,23]]},"references-count":17,"journal-issue":{"issue":"32","published-print":{"date-parts":[[2018,8,7]]}},"alternative-id":["10.1073\/pnas.1801519115"],"URL":"https:\/\/doi.org\/10.1073\/pnas.1801519115","relation":{},"ISSN":["0027-8424","1091-6490"],"issn-type":[{"value":"0027-8424","type":"print"},{"value":"1091-6490","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,7,23]]},"assertion":[{"value":"2018-07-23","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}