{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,5]],"date-time":"2025-03-05T01:10:27Z","timestamp":1741137027134,"version":"3.38.0"},"publisher-location":"Berlin, Heidelberg","reference-count":20,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642201608"},{"type":"electronic","value":"9783642201615"}],"license":[{"start":{"date-parts":[[2011,1,1]],"date-time":"2011-01-01T00:00:00Z","timestamp":1293840000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011]]},"DOI":"10.1007\/978-3-642-20161-5_47","type":"book-chapter","created":{"date-parts":[[2011,4,12]],"date-time":"2011-04-12T07:41:23Z","timestamp":1302594083000},"page":"479-490","source":"Crossref","is-referenced-by-count":2,"title":["Rules of Thumb for Information Acquisition from Large and Redundant Data"],"prefix":"10.1007","author":[{"given":"Wolfgang","family":"Gatterbauer","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"47_CR1","doi-asserted-by":"crossref","unstructured":"Achlioptas, D., Clauset, A., Kempe, D., Moore, C.: On the bias of traceroute sampling: or, power-law degree distributions in regular graphs. In: STOC, pp. 694\u2013703 (2005)","DOI":"10.1145\/1060590.1060693"},{"key":"47_CR2","unstructured":"Adamic, L.A.: Zipf, power-law, pareto \u2013 a ranking tutorial. Technical report, Information Dynamics Lab, HP Labs, Palo Alto, CA 94304 (October 2000)"},{"key":"47_CR3","doi-asserted-by":"crossref","unstructured":"Bernstein, Y., Zobel, J.: Redundant documents and search effectiveness. In: CIKM, pp. 736\u2013743 (2005)","DOI":"10.1145\/1099554.1099733"},{"issue":"1","key":"47_CR4","doi-asserted-by":"publisher","first-page":"343","DOI":"10.1002\/aris.1440370109","volume":"37","author":"R. Capurro","year":"2003","unstructured":"Capurro, R., Hj\u00f8rland, B.: The concept of information. Annual Review of Information Science and Technology\u00a037(1), 343\u2013411 (2003)","journal-title":"Annual Review of Information Science and Technology"},{"issue":"5","key":"47_CR5","doi-asserted-by":"publisher","first-page":"1461","DOI":"10.1073\/pnas.0610487104","volume":"104","author":"C. Cattuto","year":"2007","unstructured":"Cattuto, C., Loreto, V., Pietronero, L.: Semiotic dynamics and collaborative tagging. PNAS\u00a0104(5), 1461\u20131464 (2007)","journal-title":"PNAS"},{"key":"47_CR6","doi-asserted-by":"crossref","unstructured":"Chaudhuri, S., Church, K.W., K\u00f6nig, A.C., Sui, L.: Heavy-tailed distributions and multi-keyword queries. In: SIGIR, pp. 663\u2013670 (2007)","DOI":"10.1145\/1277741.1277855"},{"issue":"4","key":"47_CR7","doi-asserted-by":"publisher","first-page":"661","DOI":"10.1137\/070710111","volume":"51","author":"A. Clauset","year":"2009","unstructured":"Clauset, A., Shalizi, C.R., Newman, M.: Power-law distributions in empirical data. SIAM Review\u00a051(4), 661\u2013703 (2009)","journal-title":"SIAM Review"},{"key":"47_CR8","doi-asserted-by":"crossref","unstructured":"Downey, D., Etzioni, O., Soderland, S.: A probabilistic model of redundancy in information extraction. In: IJCAI, pp. 1034\u20131041 (2005)","DOI":"10.21236\/ADA454763"},{"key":"47_CR9","first-page":"59","volume":"AG","author":"P. Flajolet","year":"2006","unstructured":"Flajolet, P., Dumas, P., Puyhaubert, V.: Some exactly solvable models of urn process theory. Discrete Math. & Theoret. Comput. Sci.\u00a0AG, 59\u2013118 (2006)","journal-title":"Discrete Math. & Theoret. Comput. Sci."},{"key":"47_CR10","doi-asserted-by":"crossref","unstructured":"Flajolet, P., Sedgewick, R.: Analytic combinatorics. CUP (2009)","DOI":"10.1017\/CBO9780511801655"},{"issue":"2","key":"47_CR11","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1137\/0405018","volume":"5","author":"D. Gardy","year":"1992","unstructured":"Gardy, D.: Normal limiting distributions for projection and semijoin sizes. SIAM Journal on Discrete Mathematics\u00a05(2), 219\u2013248 (1992)","journal-title":"SIAM Journal on Discrete Mathematics"},{"key":"47_CR12","doi-asserted-by":"crossref","unstructured":"Gatterbauer, W.: Estimating Required Recall for Successful Knowledge Acquisition from the Web. In: WWW, pp. 969\u2013970 (2006)","DOI":"10.1145\/1135777.1135969"},{"key":"47_CR13","doi-asserted-by":"crossref","unstructured":"Gatterbauer, W.: Rules of thumb for information acquisition from large and redundant data. CoRR abs\/1012.3502 (2010)","DOI":"10.1007\/978-3-642-20161-5_47"},{"key":"47_CR14","unstructured":"Haas, P.J., Naughton, J.F., Seshadri, S., Stokes, L.: Sampling-based estimation of the number of distinct values of an attribute. In: VLDB, pp. 311\u2013322 (1995)"},{"key":"47_CR15","doi-asserted-by":"crossref","unstructured":"Ipeirotis, P.G., Agichtein, E., Jain, P., Gravano, L.: To search or to crawl? towards a query optimizer for text-centric tasks. In: SIGMOD, pp. 265\u2013276 (2006)","DOI":"10.1145\/1142473.1142504"},{"issue":"2","key":"47_CR16","doi-asserted-by":"publisher","first-page":"226","DOI":"10.1080\/15427951.2004.10129088","volume":"1","author":"M. Mitzenmacher","year":"2004","unstructured":"Mitzenmacher, M.: A brief history of generative models for power law and lognormal distributions. Internet Mathematics\u00a01(2), 226\u2013251 (2004)","journal-title":"Internet Mathematics"},{"issue":"5","key":"47_CR17","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1080\/00107510500052444","volume":"46","author":"M.E. Newman","year":"2005","unstructured":"Newman, M.E.: Power laws, pareto distributions and zipf\u2019s law. Contemporary Physics\u00a046(5), 323\u2013351 (2005)","journal-title":"Contemporary Physics"},{"key":"47_CR18","doi-asserted-by":"crossref","unstructured":"Soboroff, I., Harman, D.: Overview of the trec 2003 novelty track. In: TREC 2003. NIST, pp. 38\u201353 (2003)","DOI":"10.6028\/NIST.SP.500-255.novelty-overview"},{"issue":"12","key":"47_CR19","doi-asserted-by":"publisher","first-page":"4221","DOI":"10.1073\/pnas.0501179102","volume":"102","author":"M.P.H. Stumpf","year":"2005","unstructured":"Stumpf, M.P.H., Wiuf, C., May, R.M.: Subnets of scale-free networks are not scale-free: sampling properties of networks. PNAS\u00a0102(12), 4221\u20134224 (2005)","journal-title":"PNAS"},{"key":"47_CR20","volume-title":"Human Behaviour and the Principle of Least Effort: an Introduction to Human Ecology","author":"G.K. Zipf","year":"1949","unstructured":"Zipf, G.K.: Human Behaviour and the Principle of Least Effort: an Introduction to Human Ecology. Addison-Wesley, Reading (1949)"}],"container-title":["Lecture Notes in Computer Science","Advances in Information Retrieval"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-20161-5_47","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,5]],"date-time":"2025-03-05T00:49:33Z","timestamp":1741135773000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-20161-5_47"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011]]},"ISBN":["9783642201608","9783642201615"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-20161-5_47","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2011]]}}}