{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,11]],"date-time":"2025-12-11T20:32:21Z","timestamp":1765485141486},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2007,9,7]],"date-time":"2007-09-07T00:00:00Z","timestamp":1189123200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["The VLDB Journal"],"published-print":{"date-parts":[[2008,3]]},"DOI":"10.1007\/s00778-007-0065-y","type":"journal-article","created":{"date-parts":[[2007,9,6]],"date-time":"2007-09-06T01:34:44Z","timestamp":1189042484000},"page":"173-201","source":"Crossref","is-referenced-by-count":43,"title":["Maintaining bounded-size sample synopses of evolving datasets"],"prefix":"10.1007","volume":"17","author":[{"given":"Rainer","family":"Gemulla","sequence":"first","affiliation":[]},{"given":"Wolfgang","family":"Lehner","sequence":"additional","affiliation":[]},{"given":"Peter J.","family":"Haas","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2007,9,7]]},"reference":[{"key":"65_CR1","unstructured":"Babcock, B., Datar, M., Motwani, R.: Sampling from a moving window over streaming data. In: Proc. SODA, pp. 633\u2013634 (2002)"},{"key":"65_CR2","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1007\/11499923_9","volume-title":"Data Management in a Connected World","author":"P. Brown","year":"2005","unstructured":"Brown P., Haas P., Myllymaki J., Pirahesh H., Reinwald B. and Sismanis Y. (2005). Toward automated large-scale information integration and discovery. In: H\u00e4rder, T. and Lehner, W. (eds) Data Management in a Connected World, pp 161\u2013180. Springer, Heidelberg"},{"key":"65_CR3","doi-asserted-by":"crossref","unstructured":"Brown, P., Haas, P.J.: BHUNT: automatic discovery of fuzzy algebraic constraints in relational data. In: Proc. VLDB, pp. 668\u2013679 (2003)","DOI":"10.1016\/B978-012722442-8\/50065-3"},{"key":"65_CR4","doi-asserted-by":"crossref","unstructured":"Brown, P.G., Haas, P.J.: Techniques for warehousing of sample data. In: Proc. ICDE (2006)","DOI":"10.1109\/ICDE.2006.157"},{"key":"65_CR5","doi-asserted-by":"crossref","unstructured":"Chaudhuri, S., Motwani, R., Narasayya, V.R.: On random sampling over joins. In: Proc. ACM SIGMOD, pp. 263\u2013274 (1999)","DOI":"10.1145\/304182.304206"},{"key":"65_CR6","unstructured":"Colt Library: Open source libraries for high performance scientific and technical computing in Java. http:\/\/dsd.lbl.gov\/ hoschek\/colt\/"},{"key":"65_CR7","doi-asserted-by":"crossref","unstructured":"Cormode, G., Muthukrishnan, S., Rozenbaum, I.: Summarizing and mining inverse distributions on data streams via dynamic inverse sampling. In: Proc. VLDB, pp. 25\u201336 (2005)","DOI":"10.1137\/1.9781611972757.5"},{"key":"65_CR8","doi-asserted-by":"crossref","first-page":"387","DOI":"10.1080\/01621459.1962.10480667","volume":"57","author":"C. Fan","year":"1962","unstructured":"Fan C., Muller M. and Rezucha I. (1962). Development of sampling plans by using sequential (item by item) techniques and digital computers. J. Am. Statist. Assoc. 57: 387\u2013402","journal-title":"J. Am. Statist. Assoc."},{"key":"65_CR9","doi-asserted-by":"crossref","unstructured":"Frahling, G., Indyk, P., Sohler, C.: Sampling in dynamic data streams and applications. In: Proc. 21st Symp. Computat. Geom., pp. 142\u2013149 (2005)","DOI":"10.1145\/1064092.1064116"},{"key":"65_CR10","doi-asserted-by":"crossref","unstructured":"Gemulla, R., Lehner, W.: Deferred maintenance of disk-based random samples. In: Proc. EDBT, pp. 423\u2013441 (2006)","DOI":"10.1007\/11687238_27"},{"key":"65_CR11","unstructured":"Gemulla, R., Lehner, W., Haas, P.J.: A dip in the reservoir: Maintaining sample synopses of evolving datasets. In: Proc. VLDB, pp. 595\u2013606 (2006)"},{"key":"65_CR12","doi-asserted-by":"crossref","unstructured":"Gemulla, R., Lehner, W., Haas, P.J.: Maintaining Bernoulli samples over evolving multisets. In: Proc. ACM PODS, pp. 93\u2013102 (2007)","DOI":"10.1145\/1265530.1265544"},{"key":"65_CR13","volume-title":"AQUA project white paper","author":"P. Gibbons","year":"1997","unstructured":"Gibbons P., Matias Y. and Poosala V. (1997). AQUA project white paper. Tech. rep., Bell Laboratories, Murray Hill"},{"key":"65_CR14","doi-asserted-by":"crossref","unstructured":"Gibbons, P.B., Matias, Y.: New sampling-based summary statistics for improving approximate query answers. In: Proc. ACM SIGMOD, pp. 331\u2013342 (1998)","DOI":"10.1145\/276305.276334"},{"key":"65_CR15","doi-asserted-by":"crossref","first-page":"182","DOI":"10.1145\/581751.581753","volume":"27","author":"P.B. Gibbons","year":"2002","unstructured":"Gibbons P.B., Matias Y. and Poosala V. (2002). Fast incremental maintenance of approximate histograms. ACM Trans. Database Syst. 27: 182\u2013184","journal-title":"ACM Trans. Database Syst."},{"key":"65_CR16","unstructured":"GSL: GNU Scientific Library. http:\/\/www.gnu.org\/software\/gsl\/"},{"key":"65_CR17","doi-asserted-by":"crossref","unstructured":"Haas, P., K\u00f6nig, C.: A bi-level Bernoulli scheme for database sampling. In: Proc. ACM SIGMOD, pp. 275\u2013286 (2004)","DOI":"10.1145\/1007568.1007601"},{"key":"65_CR18","unstructured":"Haas, P.J.: Data stream sampling: Basic techniques and results. In: Garofalakis, M., Gehrke, J., Rastogi, R. (eds.) Data Stream Management: Processing High Speed Data Streams, Springer, Heidelberg (2007)"},{"key":"65_CR19","unstructured":"Halevy, A.Y., Etzioni, O., Doan, A., Ives, Z.G., Madhavan, J., McDowell, L., Tatarinov, I.: Join synopses for approximate query answering. In: Proc. CIDR (2003)"},{"key":"65_CR20","doi-asserted-by":"crossref","unstructured":"Hellerstein, J.M., Haas, P.J., Wang, H.J.: Online aggregation. In: Proc. ACM SIGMOD, pp. 171\u2013182 (1997)","DOI":"10.1145\/253262.253291"},{"key":"65_CR21","unstructured":"IBM Corporation: WebSphere Profile Stage User\u2019s Manual (2005)"},{"key":"65_CR22","doi-asserted-by":"crossref","unstructured":"Ilyas, I.F., Markl, V., Haas, P.J., Brown, P., Aboulnaga, A.: CORDS: automatic discovery of correlations and soft functional dependencies. In: Proc. ACM SIGMOD, pp. 647\u2013658 (2004)","DOI":"10.1145\/1007568.1007641"},{"key":"65_CR23","doi-asserted-by":"crossref","unstructured":"Jermaine, C., Pol, A., Arumugam, S.: Online maintenance of very large random samples. In: Proc. ACM SIGMOD, pp. 299\u2013310 (2004)","DOI":"10.1145\/1007568.1007603"},{"key":"65_CR24","unstructured":"John, G.H., Langley, P.: Static versus dynamic sampling for data mining. In: Proc. KDD, pp. 367\u2013370 (2005)"},{"key":"65_CR25","volume-title":"Discrete Univariate Distributions, 2nd edn","author":"N.L. Johnson","year":"1992","unstructured":"Johnson N.L., Kotz S. and Kemp A.W. (1992). Discrete Univariate Distributions, 2nd edn. Wiley, New York"},{"key":"65_CR26","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1080\/00949658508810839","volume":"22","author":"V. Kachitvichyanukul","year":"1985","unstructured":"Kachitvichyanukul V. and Schmeiser B. (1985). Computer generation of hypergeometric random variables. J. Stat. Comput. Simul 22: 127\u2013145","journal-title":"J. Stat. Comput. Simul"},{"key":"65_CR27","doi-asserted-by":"crossref","unstructured":"Kivinen, J., Mannila, H.: The power of sampling in knowledge discovery. In: Proc. ACM PODS, pp. 77\u201385 (1994)","DOI":"10.1145\/182591.182601"},{"key":"65_CR28","unstructured":"Knuth, D.E.: The Art of Computer Programming, vol. 2: Seminumerical Algorithms, 1st edn. Addison-Wesley, Reading (1969)"},{"key":"65_CR29","volume-title":"Simulation Modeling and Analysis, 4th edn","author":"A.M. Law","year":"2007","unstructured":"Law A.M. (2007). Simulation Modeling and Analysis, 4th edn. McGraw-Hill, New York"},{"key":"65_CR30","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1016\/S0927-0507(06)13003-0","volume-title":"Simulation","author":"P.\u00a0 L\u2019Ecuyer","year":"2006","unstructured":"L\u2019Ecuyer P.\u00a0 (2006). Uniform random number generation. In: Henderson, S.G. and Nelson, B.L. (eds) Simulation, pp 55\u201381. Elsevier, Amsterdam"},{"key":"65_CR31","doi-asserted-by":"crossref","unstructured":"Leser, U., Naumann, F.: (Almost) hands-off information integration for the life sciences. In: Proc. CIDR, pp. 131\u2013143 (2005)","DOI":"10.1007\/11799511"},{"issue":"1","key":"65_CR32","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1145\/272991.272995","volume":"8","author":"M. Matsumoto","year":"1998","unstructured":"Matsumoto M. and Nishimura T. (1998). Mersenne twister: a 623-dimensionally equidistributed uniform pseudo-random number generator. ACM Trans. Model. Comput. Simul. 8(1): 3\u201330","journal-title":"ACM Trans. Model. Comput. Simul."},{"key":"65_CR33","doi-asserted-by":"crossref","first-page":"182","DOI":"10.2307\/2347297","volume":"32","author":"A.I. McLeod","year":"1983","unstructured":"McLeod A.I. and Bellhouse D.R. (1983). A convenient algorithm for drawing a simple random sample. Appl. Statist. 32: 182\u2013184","journal-title":"Appl. Statist."},{"key":"65_CR34","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511810633","volume-title":"Markov Chains","author":"J.R. Norris","year":"1997","unstructured":"Norris J.R. (1997). Markov Chains. Cambridge University Press, Cambridge"},{"key":"65_CR35","unstructured":"Olken, F.: Random sampling from databases. Thesis LBL-32883, Information and Computing Sciences Division, Lawrence Berkeley National Laboratory (1993)"},{"key":"65_CR36","doi-asserted-by":"crossref","unstructured":"Olken, F., Rotem, D.: Maintenance of materialized views of sampling queries. In: Proc. ICDE (1992)","DOI":"10.1109\/ICDE.1992.213145"},{"key":"65_CR37","doi-asserted-by":"crossref","unstructured":"Poosala, V., Haas, P.J., Ioannidis, Y.E., Shekita, E.J.: Improved histograms for selectivity estimation of range predicates. In: Proc. ACM SIGMOD, pp. 294\u2013305 (1996)","DOI":"10.1145\/233269.233342"},{"key":"65_CR38","volume-title":"Numerical Recipes in C, 2nd edn","author":"W.H. Press","year":"1992","unstructured":"Press W.H., Teukolsky S.A., Vetterling W.T. and Flannery B.P. (1992). Numerical Recipes in C, 2nd edn. Cambridge University Press, Cambridge"},{"key":"65_CR39","doi-asserted-by":"crossref","first-page":"400","DOI":"10.1214\/aoms\/1177729586","volume":"22","author":"H. Robbins","year":"1951","unstructured":"Robbins H. and Monro S. (1951). A stochastic approximation method. Ann. Math. Statist. 22: 400\u2013407","journal-title":"Ann. Math. Statist."},{"key":"65_CR40","volume-title":"Stochastic Processes","author":"S.M. Ross","year":"1983","unstructured":"Ross S.M. (1983). Stochastic Processes. Wiley, New York"},{"key":"65_CR41","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4612-4378-6","volume-title":"Model Assisted Survey Sampling","author":"C.E. S\u00e4rndal","year":"1992","unstructured":"S\u00e4rndal C.E., Swensson B. and Wretman J. (1992). Model Assisted Survey Sampling. Springer, Heidelberg"},{"key":"65_CR42","doi-asserted-by":"crossref","DOI":"10.1002\/0471722138","volume-title":"Introduction to Stochastic Search and Optimization","author":"J.C. Spall","year":"2003","unstructured":"Spall J.C. (2003). Introduction to Stochastic Search and Optimization. Wiley, New York"},{"key":"65_CR43","doi-asserted-by":"crossref","unstructured":"Tatbul, N., \u00c7etintemel, U., Zdonik, S.B., Cherniack, M., Stonebraker, M.: Load shedding in a data stream manager. In: Proc. VLDB, pp. 309\u2013320 (2003)","DOI":"10.1016\/B978-012722442-8\/50035-5"},{"issue":"7","key":"65_CR44","doi-asserted-by":"crossref","first-page":"703","DOI":"10.1145\/358105.893","volume":"27","author":"J.S. Vitter","year":"1984","unstructured":"Vitter J.S. (1984). Faster methods for random sampling. Commun. ACM 27(7): 703\u2013718","journal-title":"Commun. ACM"},{"issue":"1","key":"65_CR45","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1145\/3147.3165","volume":"11","author":"J.S. Vitter","year":"1985","unstructured":"Vitter J.S. (1985). Random sampling with a reservoir. ACM Trans. Math. Softw. 11(1): 37\u201357","journal-title":"ACM Trans. Math. Softw."},{"key":"65_CR46","unstructured":"Zechner, H.: Efficient sampling from continuous and discrete distributions. Ph.D. thesis, Technical University Graz (1997)"}],"container-title":["The VLDB Journal"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-007-0065-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00778-007-0065-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-007-0065-y","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,29]],"date-time":"2019-05-29T11:05:02Z","timestamp":1559127902000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00778-007-0065-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2007,9,7]]},"references-count":46,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2008,3]]}},"alternative-id":["65"],"URL":"https:\/\/doi.org\/10.1007\/s00778-007-0065-y","relation":{},"ISSN":["1066-8888","0949-877X"],"issn-type":[{"value":"1066-8888","type":"print"},{"value":"0949-877X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2007,9,7]]}}}