{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,27]],"date-time":"2025-06-27T01:56:27Z","timestamp":1750989387998,"version":"3.37.3"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2021,10,21]],"date-time":"2021-10-21T00:00:00Z","timestamp":1634774400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,10,21]],"date-time":"2021-10-21T00:00:00Z","timestamp":1634774400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of Chin","doi-asserted-by":"crossref","award":["U1866602, 61772157"],"award-info":[{"award-number":["U1866602, 61772157"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Distrib Parallel Databases"],"published-print":{"date-parts":[[2022,3]]},"DOI":"10.1007\/s10619-021-07376-5","type":"journal-article","created":{"date-parts":[[2021,10,21]],"date-time":"2021-10-21T18:07:42Z","timestamp":1634839662000},"page":"165-200","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["MISS: finding optimal sample sizes for approximate analytics"],"prefix":"10.1007","volume":"40","author":[{"given":"Xuebin","family":"Su","sequence":"first","affiliation":[]},{"given":"Hongzhi","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,10,21]]},"reference":[{"key":"7376_CR1","unstructured":"Tpc-h benchmark (2017). http:\/\/www.tpc.org\/tpch\/"},{"key":"7376_CR2","doi-asserted-by":"publisher","unstructured":"Agarwal, S., Milner, H., Kleiner, A., Talwalkar, A., Jordan, M.I., Madden, S., Mozafari, B., Stoica, I.: Knowing when you\u2019re wrong: building fast and reliable approximate query processing systems. In: International Conference on Management of Data, SIGMOD 2014, Snowbird, UT, USA, 22\u201327 June 2014, pp. 481\u2013492 (2014). https:\/\/doi.org\/10.1145\/2588555.2593667","DOI":"10.1145\/2588555.2593667"},{"key":"7376_CR3","doi-asserted-by":"publisher","unstructured":"Agarwal, S., Mozafari, B., Panda, A., Milner, H., Madden, S., Stoica, I.: Blinkdb: queries with bounded errors and bounded response times on very large data. In: Eighth Eurosys Conference 2013, EuroSys \u201913, Prague, Czech Republic, 14\u201317 April 2013, pp. 29\u201342 (2013). https:\/\/doi.org\/10.1145\/2465351.2465355","DOI":"10.1145\/2465351.2465355"},{"key":"7376_CR4","doi-asserted-by":"publisher","unstructured":"Alabi, D., Wu, E.: Pfunk-h: approximate query processing using perceptual models. In: Proceedings of the Workshop on Human-In-the-Loop Data Analytics, HILDA@SIGMOD 2016, San Francisco, CA, USA, 26 June\u201301 July 2016, p.\u00a010 (2016) https:\/\/doi.org\/10.1145\/2939502.2939512","DOI":"10.1145\/2939502.2939512"},{"key":"7376_CR5","unstructured":"Amaran, S., Sahinidis, N.V., Sharda, B., Bury, S.J.: Simulation optimization: A review of algorithms and applications. CoRR (2017). arxiv:1706.08591"},{"key":"7376_CR6","doi-asserted-by":"crossref","unstructured":"Bhatia, R., Davis, C.: A better bound on the variance. Am. Math. Mon. 107(4), 353\u2013357 (2000). http:\/\/www.jstor.org\/stable\/2589180","DOI":"10.1080\/00029890.2000.12005203"},{"key":"7376_CR7","volume-title":"Empirical Model-building and Response Surface","author":"GEP Box","year":"1986","unstructured":"Box, G.E.P., Draper, N.R.: Empirical Model-Building and Response Surface. Wiley, New York (1986)"},{"key":"7376_CR8","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511804441","volume-title":"Convex optimization","author":"S Boyd","year":"2004","unstructured":"Boyd, S., Vandenberghe, L.: Convex Optimization. Cambridge University Press, Cambridge (2004)"},{"key":"7376_CR9","unstructured":"Casella, G., Berger, R.L.: Statistical Inference. Duxbury Advanced Series in Statistics and Decision Sciences. Thomson Learning, Pacific Grove (2002)"},{"key":"7376_CR10","unstructured":"Chen, X.: A New Generalization of Chebyshev Inequality for Random Vectors. arXiv e-prints (2007)"},{"issue":"1","key":"7376_CR11","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1080\/15427951.2006.10129115","volume":"3","author":"F Chung","year":"2006","unstructured":"Chung, F., Lu, L.: Concentration inequalities and martingale inequalities: a survey. Internet Math. 3(1), 79\u2013127 (2006)","journal-title":"Internet Mathematics"},{"key":"7376_CR12","doi-asserted-by":"publisher","unstructured":"Cormode, G.: Data sketching. Commun. ACM 60(9), 48\u201355 (2017). https:\/\/doi.org\/10.1145\/3080008","DOI":"10.1145\/3080008"},{"issue":"3","key":"7376_CR13","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1214\/ss\/1032280214","volume":"11","author":"TJ DiCiccio","year":"1996","unstructured":"DiCiccio, T.J., Efron, B.: Bootstrap confidence intervals. Stat. Sci. 11(3), 189\u2013228 (1996). https:\/\/doi.org\/10.1214\/ss\/1032280214","journal-title":"Statist. Sci."},{"key":"7376_CR14","doi-asserted-by":"publisher","unstructured":"Ding, B., Huang, S., Chaudhuri, S., Chakrabarti, K., Wang, C.: Sample + seek: approximating aggregates with distribution precision guarantee. In: Proceedings of the 2016 International Conference on Management of Data, SIGMOD Conference 2016, San Francisco, CA, USA, 26 June\u201301 July 2016, pp. 679\u2013694 (2016). https:\/\/doi.org\/10.1145\/2882903.2915249","DOI":"10.1145\/2882903.2915249"},{"key":"7376_CR15","unstructured":"Erlandson, E.: Faster random samples with gap sampling (2014). http:\/\/erikerlandson.github.io\/blog\/2014\/09\/11\/faster-random-samples-with-gap-sampling\/"},{"key":"7376_CR16","doi-asserted-by":"publisher","unstructured":"Gryz, J., Guo, J., Liu, L., Zuzarte, C.: Query sampling in DB2 universal database. In: Proceedings of the ACM SIGMOD International Conference on Management of Data, Paris, France, 13\u201318 June 2004, pp. 839\u2013843 (2004). https:\/\/doi.org\/10.1145\/1007568.1007664","DOI":"10.1145\/1007568.1007664"},{"key":"7376_CR17","unstructured":"Hall, P.: The Bootstrap and Edgeworth Expansion. Springer Series in Statistics. Springer, New York (1997)"},{"key":"7376_CR18","doi-asserted-by":"publisher","unstructured":"Hellerstein, J.M., Haas, P.J., Wang, H.J.: Online aggregation. In: SIGMOD 1997, Proceedings ACM SIGMOD International Conference on Management of Data, 13\u201315 May 1997, Tucson, AZ, USA., pp. 171\u2013182 (1997). https:\/\/doi.org\/10.1145\/253260.253291","DOI":"10.1145\/253260.253291"},{"issue":"3","key":"7376_CR19","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1214\/aoms\/1177730196","volume":"19","author":"W Hoeffding","year":"1948","unstructured":"Hoeffding, W.: A class of statistics with asymptotically normal distribution. Ann. Math. Stat. 19(3), 293\u2013325 (1948). https:\/\/doi.org\/10.1214\/aoms\/1177730196","journal-title":"Ann. Math. Statist."},{"key":"7376_CR20","doi-asserted-by":"crossref","unstructured":"Hoeffding, W.: Probability inequalities for sums of bounded random variables. J. Am. Stat. Assoc. 58(301), 13\u201330 (1963)","DOI":"10.1080\/01621459.1963.10500830"},{"issue":"1","key":"7376_CR21","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1214\/aoms\/1177703732","volume":"35","author":"PJ Huber","year":"1964","unstructured":"Huber, P.J.: Robust estimation of a location parameter. Ann. Math. Stat. 35(1), 73\u2013101 (1964). https:\/\/doi.org\/10.1214\/aoms\/1177703732","journal-title":"Ann. Math. Statist."},{"key":"7376_CR22","unstructured":"Inc., S.: Sample selection (2017). https:\/\/snappydatainc.github.io\/snappydata\/sde\/sample_selection\/"},{"key":"7376_CR23","unstructured":"Kerrisk, M.: The Linux Programming Interface. No Starch Press Series. No Starch Press, San Francisco (2010)"},{"key":"7376_CR24","doi-asserted-by":"crossref","unstructured":"Kim, A., Blais, E., Parameswaran, A.G., Indyk, P., Madden, S., Rubinfeld, R.: Rapid sampling for visualizations with ordering guarantees. PVLDB 8(5), 521\u2013532 (2015). http:\/\/www.vldb.org\/pvldb\/vol8\/p521-kim.pdf","DOI":"10.14778\/2735479.2735485"},{"key":"7376_CR25","unstructured":"Kreyszig, E.: Introductory Functional Analysis with Applications. Wiley Classics Library. Wiley, New York (1989)"},{"key":"7376_CR26","doi-asserted-by":"publisher","unstructured":"Krishnan, S., Wang, J., Franklin, M.J., Goldberg, K., Kraska, T.: Privateclean: Data cleaning and differential privacy. In: Proceedings of the 2016 International Conference on Management of Data, SIGMOD Conference 2016, San Francisco, CA, USA, 26 June\u201301 July 2016, pp. 937\u2013951 (2016). https:\/\/doi.org\/10.1145\/2882903.2915248","DOI":"10.1145\/2882903.2915248"},{"key":"7376_CR27","doi-asserted-by":"publisher","unstructured":"Li, F., Wu, B., Yi, K., Zhao, Z.: Wander join: Online aggregation via random walks. In: Proceedings of the 2016 International Conference on Management of Data, SIGMOD Conference 2016, San Francisco, CA, USA, 26 June\u201301 July 2016, pp. 615\u2013629 (2016). https:\/\/doi.org\/10.1145\/2882903.2915235","DOI":"10.1145\/2882903.2915235"},{"key":"7376_CR28","unstructured":"Lohr, S.L.: Sampling: Design and Analysis. Advanced (Cengage Learning). Brooks\/Cole, Boston (2009). https:\/\/books.google.com\/books?id=aSXKXbyNlMQC"},{"key":"7376_CR29","doi-asserted-by":"publisher","unstructured":"Mozafari, B.: Approximate query engines: Commercial challenges and research opportunities. In: Proceedings of the 2017 ACM International Conference on Management of Data, SIGMOD Conference 2017, Chicago, IL, USA, 14\u201319 May 2017, pp. 521\u2013524 (2017). https:\/\/doi.org\/10.1145\/3035918.3056098","DOI":"10.1145\/3035918.3056098"},{"key":"7376_CR30","unstructured":"Mozafari, B., Niu, N.: A handbook for building an approximate query engine. IEEE Data Eng. Bull. 38(3), 3\u201329 (2015)"},{"key":"7376_CR31","unstructured":"Mozafari, B., Ramnarayan, J., Menon, S., Mahajan, Y., Chakraborty, S., Bhanawat, H., Bachhav, K.: Snappydata: A unified cluster for streaming, transactions and interactice analytics. In: CIDR 2017, 8th Biennial Conference on Innovative Data Systems Research, Chaminade, CA, USA, 8\u201311 January 2017, Online Proceedings (2017). http:\/\/cidrdb.org\/cidr2017\/papers\/p28-mozafari-cidr17.pdf"},{"key":"7376_CR32","unstructured":"Nocedal, J., Wright, S.: Numerical Optimization. Springer Series in Operations Research and Financial Engineering. Springer, New York (2006)"},{"key":"7376_CR33","unstructured":"van der Vaart, A.W.: Asymptotic Statistics. Cambridge Series in Statistical and Probabilistic Mathematics. Cambridge University Press, Cambridge (2000)"},{"key":"7376_CR34","doi-asserted-by":"publisher","unstructured":"Wang, J., Krishnan, S., Franklin, M.J., Goldberg, K., Kraska, T., Milo, T.: A sample-and-clean framework for fast and accurate query processing on dirty data. In: International Conference on Management of Data, SIGMOD 2014, Snowbird, UT, USA, 22\u201327 June 2014, pp. 469\u2013480 (2014). https:\/\/doi.org\/10.1145\/2588555.2610505","DOI":"10.1145\/2588555.2610505"},{"key":"7376_CR35","doi-asserted-by":"crossref","unstructured":"Wang, J., Lin, C., He, R., Chae, M., Papakonstantinou, Y., Swanson, S.: MILC: inverted list compression in memory. PVLDB 10(8), 853\u2013864 (2017)","DOI":"10.14778\/3090163.3090164"},{"key":"7376_CR36","doi-asserted-by":"crossref","unstructured":"Wasserman, L.: All of Statistics: A Concise Course in Statistical Inference. Springer Texts in Statistics. Springer, New York (2004)","DOI":"10.1007\/978-0-387-21736-9"},{"key":"7376_CR37","unstructured":"Wasserman, L.: All of Nonparametric Statistics. Springer Texts in Statistics. Springer, New York (2006)"},{"key":"7376_CR38","unstructured":"Zaharia, M., Chowdhury, M., Das, T., Dave, A., Ma, J., McCauly, M., Franklin, M.J., Shenker, S., Stoica, I.: Resilient distributed datasets: A fault-tolerant abstraction for in-memory cluster computing. In: Proceedings of the 9th USENIX Symposium on Networked Systems Design and Implementation, NSDI 2012, San Jose, CA, USA, 25\u201327 April 2012, pp. 15\u201328 (2012). https:\/\/www.usenix.org\/conference\/nsdi12\/technical-sessions\/presentation\/zaharia"},{"key":"7376_CR39","doi-asserted-by":"publisher","unstructured":"Zeng, K., Agarwal, S., Stoica, I.: iolap: Managing uncertainty for efficient incremental OLAP. In: Proceedings of the 2016 International Conference on Management of Data, SIGMOD Conference 2016, San Francisco, CA, USA, 26 June\u201301 July 2016, pp. 1347\u20131361 (2016). https:\/\/doi.org\/10.1145\/2882903.2915240","DOI":"10.1145\/2882903.2915240"},{"key":"7376_CR40","doi-asserted-by":"publisher","unstructured":"Zeng, K., Gao, S., Mozafari, B., Zaniolo, C.: The analytical bootstrap: a new method for fast error estimation in approximate query processing. In: International Conference on Management of Data, SIGMOD 2014, Snowbird, UT, USA, June 22-27, 2014, pp. 277\u2013288 (2014). https:\/\/doi.org\/10.1145\/2588555.2588579","DOI":"10.1145\/2588555.2588579"}],"container-title":["Distributed and Parallel Databases"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10619-021-07376-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10619-021-07376-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10619-021-07376-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,3,3]],"date-time":"2022-03-03T08:10:35Z","timestamp":1646295035000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10619-021-07376-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,21]]},"references-count":40,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2022,3]]}},"alternative-id":["7376"],"URL":"https:\/\/doi.org\/10.1007\/s10619-021-07376-5","relation":{},"ISSN":["0926-8782","1573-7578"],"issn-type":[{"type":"print","value":"0926-8782"},{"type":"electronic","value":"1573-7578"}],"subject":[],"published":{"date-parts":[[2021,10,21]]},"assertion":[{"value":"15 September 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 October 2021","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}