{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T14:43:53Z","timestamp":1743086633417,"version":"3.40.3"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031683084"},{"type":"electronic","value":"9783031683091"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-68309-1_10","type":"book-chapter","created":{"date-parts":[[2024,8,17]],"date-time":"2024-08-17T14:02:25Z","timestamp":1723903345000},"page":"124-138","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Efficient Random Sampling from\u00a0Very Large Databases"],"prefix":"10.1007","author":[{"given":"Idan","family":"Cohen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aviv","family":"Yehezkel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zohar","family":"Yakhini","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,8,18]]},"reference":[{"issue":"4","key":"10_CR1","doi-asserted-by":"publisher","first-page":"557","DOI":"10.1007\/s00778-015-0389-y","volume":"24","author":"Z Abedjan","year":"2015","unstructured":"Abedjan, Z., Golab, L., Naumann, F.: Profiling relational data: a survey. VLDB J. 24(4), 557\u2013581 (2015)","journal-title":"VLDB J."},{"key":"10_CR2","doi-asserted-by":"crossref","unstructured":"Agrawal, R., Kadadi, A., Dai, X., Andres, F.: Challenges and opportunities with big data visualization. In: Proceedings of the 7th International Conference on Management of Computational and Collective intElligence in Digital EcoSystems, pp. 169\u2013173 (2015)","DOI":"10.1145\/2857218.2857256"},{"key":"10_CR3","unstructured":"Antoshenkov, G.: Random sampling from pseudo-ranked B+ trees. In: VLDB, pp. 375\u2013382 (1992)"},{"key":"10_CR4","doi-asserted-by":"crossref","unstructured":"Chaudhuri, S., Ding, B., Kandula, S.: Approximate query processing: no silver bullet. In: Proceedings of the 2017 ACM International Conference on Management of Data, pp. 511\u2013519 (2017)","DOI":"10.1145\/3035918.3056097"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Chaudhuri, S., Motwani, R., Narasayya, V.: Using random sampling for histogram construction. In: Proceedings of the ACM SIGMOD Conference, pp. 436\u2013447 (1998)","DOI":"10.1145\/276305.276343"},{"issue":"2","key":"10_CR6","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1145\/356770.356776","volume":"11","author":"D Comer","year":"1979","unstructured":"Comer, D.: Ubiquitous B-tree. ACM Comput. Surv. (CSUR) 11(2), 121\u2013137 (1979)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"10_CR7","doi-asserted-by":"crossref","unstructured":"Graefe, G., Kuno, H.: Modern B-tree techniques. In: 2011 IEEE 27th International Conference on Data Engineering, pp. 1370\u20131373. IEEE (2011)","DOI":"10.1109\/ICDE.2011.5767956"},{"issue":"2","key":"10_CR8","first-page":"6","volume":"10","author":"PJ Haas","year":"2003","unstructured":"Haas, P.J.: Speeding up DB2 UDB using sampling. IDUG Solut. J. 10(2), 6 (2003)","journal-title":"IDUG Solut. J."},{"key":"10_CR9","doi-asserted-by":"crossref","unstructured":"Haas, P.J., Naughton, J.F., Swami, A.N.: On the relative cost of sampling for join selectivity estimation. In: Proceedings of the Thirteenth ACM SIGACT-SIGMOD-SIGART Symposium on Principles of Database Systems, pp. 14\u201324 (1994)","DOI":"10.1145\/182591.182594"},{"issue":"2","key":"10_CR10","doi-asserted-by":"publisher","first-page":"278","DOI":"10.1145\/119995.115837","volume":"20","author":"W-C Hou","year":"1991","unstructured":"Hou, W.-C., Ozsoyoglu, G., Dogdu, E.: Error-constrained COUNT query evaluation in relational databases. ACM SIGMOD Rec. 20(2), 278\u2013287 (1991)","journal-title":"ACM SIGMOD Rec."},{"key":"10_CR11","doi-asserted-by":"crossref","unstructured":"Jermaine, C., Pol, A., Arumugam, S.: Online maintenance of very large random samples. In: Proceedings of the 2004 ACM SIGMOD International Conference on Management of Data, pp. 299\u2013310 (2004)","DOI":"10.1145\/1007568.1007603"},{"key":"10_CR12","unstructured":"Kluckhohn, C.: Human behavior and the principle of least effort (1950)"},{"key":"10_CR13","unstructured":"Kudale, A.: B+ tree Preference over B Tree. Chicago, USA (n. d.). http:\/\/www.academia.edu\/11575258\/B_tree_preference_over_B_trees"},{"key":"10_CR14","doi-asserted-by":"crossref","unstructured":"Li, F., Wu, B., Yi, K., Zhao, Z.: Wander join: online aggregation via random walks. In: Proceedings of the 2016 International Conference on Management of Data, pp. 615\u2013629 (2016)","DOI":"10.1145\/2882903.2915235"},{"issue":"4","key":"10_CR15","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1007\/s41019-018-0074-4","volume":"3","author":"K Li","year":"2018","unstructured":"Li, K., Li, G.: Approximate query processing: what is new and where to go? Data Sci. Eng. 3(4), 379\u2013397 (2018)","journal-title":"Data Sci. Eng."},{"issue":"1","key":"10_CR16","doi-asserted-by":"publisher","first-page":"195","DOI":"10.1016\/0304-3975(93)90224-H","volume":"116","author":"RJ Lipton","year":"1993","unstructured":"Lipton, R.J., Naughton, J.F., Schneider, D.A., Seshadri, S.: Efficient sampling strategies for relational database operations. Theor. Comput. Sci. 116(1), 195\u2013226 (1993)","journal-title":"Theor. Comput. Sci."},{"issue":"2020","key":"10_CR17","doi-asserted-by":"publisher","first-page":"72713","DOI":"10.1109\/ACCESS.2020.2988120","volume":"8","author":"Z Liu","year":"2020","unstructured":"Liu, Z., Zhang, A.: Sampling for big data profiling: a survey. IEEE Access 8(2020), 72713\u201372726 (2020)","journal-title":"IEEE Access"},{"issue":"4","key":"10_CR18","doi-asserted-by":"publisher","first-page":"359","DOI":"10.3233\/IDA-2002-6405","volume":"6","author":"D Makawita","year":"2002","unstructured":"Makawita, D., Tan, K.-L., Liu, H.: Sampling from databases using B+-trees. Intell. Data Anal. 6(4), 359\u2013377 (2002)","journal-title":"Intell. Data Anal."},{"issue":"1","key":"10_CR19","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.chemolab.2004.03.013","volume":"74","author":"P Minkkinen","year":"2004","unstructured":"Minkkinen, P.: Practical applications of sampling theory. Chemometr. Intell. Lab. Syst. 74(1), 85\u201394 (2004)","journal-title":"Chemometr. Intell. Lab. Syst."},{"key":"10_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"499","DOI":"10.1007\/3-540-53507-1_98","volume-title":"ICDT \u201990","author":"JF Naughton","year":"1990","unstructured":"Naughton, J.F., Seshadri, S.: On estimating the size of projections. In: Abiteboul, S., Kanellakis, P.C. (eds.) ICDT 1990. LNCS, vol. 470, pp. 499\u2013513. Springer, Heidelberg (1990). https:\/\/doi.org\/10.1007\/3-540-53507-1_98"},{"key":"10_CR21","unstructured":"Olken, F.: Random sampling from databases. Ph.D. Dissertation. University of California, Berkeley (1993)"},{"key":"10_CR22","doi-asserted-by":"crossref","unstructured":"Olken, F., Rotem, D.: Random sampling from B+ trees. In: Proceedings of the 15th VLDB Conference, Amsterdam, The Netherlands (1989)","DOI":"10.1145\/93597.98746"},{"issue":"1","key":"10_CR23","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1007\/BF00140664","volume":"5","author":"F Olken","year":"1995","unstructured":"Olken, F., Rotem, D.: Random sampling from databases: a survey. Stat. Comput. 5(1), 25\u201342 (1995)","journal-title":"Stat. Comput."},{"issue":"4","key":"10_CR24","first-page":"38","volume":"39","author":"O Papaemmanouil","year":"2016","unstructured":"Papaemmanouil, O., Diao, Y., Dimitriadou, K., Peng, L.: Interactive data exploration via machine learning models. IEEE Data Eng. Bull. 39(4), 38\u201349 (2016)","journal-title":"IEEE Data Eng. Bull."},{"issue":"2","key":"10_CR25","doi-asserted-by":"publisher","first-page":"256","DOI":"10.1145\/971697.602294","volume":"14","author":"G Piatetsky-Shapiro","year":"1984","unstructured":"Piatetsky-Shapiro, G., Connell, C.: Accurate estimation of the number of tuples satisfying a condition. ACM SIGMOD Rec. 14(2), 256\u2013276 (1984)","journal-title":"ACM SIGMOD Rec."},{"key":"10_CR26","unstructured":"Poosala, V.: Zipf\u2019s law (1995). citeseer.ist.psu.edu\/116813.html"},{"key":"10_CR27","unstructured":"Shekelyan, M., Cormode, G., Triantafillou, P., Shanghooshabad, A., Ma, Q.: Weighted random sampling over joins. arXiv preprint arXiv:2201.02670 (2022)"},{"issue":"5","key":"10_CR28","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1109\/MSP.2014.2327238","volume":"31","author":"K Slavakis","year":"2014","unstructured":"Slavakis, K., Giannakis, G.B., Mateos, G.: Modeling and optimization for big data analytics: (statistical) learning tools for our era of data deluge. IEEE Signal Process. Mag. 31(5), 18\u201331 (2014)","journal-title":"IEEE Signal Process. Mag."},{"issue":"1","key":"10_CR29","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1145\/3147.3165","volume":"11","author":"JS Vitter","year":"1985","unstructured":"Vitter, J.S.: Random sampling with a reservoir. ACM Trans. Math. Softw. (TOMS) 11(1), 37\u201357 (1985)","journal-title":"ACM Trans. Math. Softw. (TOMS)"},{"key":"10_CR30","series-title":"Springer Series in Statistics","doi-asserted-by":"publisher","first-page":"196","DOI":"10.1007\/978-1-4612-4380-9_16","volume-title":"Breakthroughs in Statistics","author":"F Wilcoxon","year":"1992","unstructured":"Wilcoxon, F.: Individual comparisons by ranking methods. In: Kotz, S., Johnson, N.L. (eds.) Breakthroughs in Statistics. Springer Series in Statistics, pp. 196\u2013202. Springer, New York (1992). https:\/\/doi.org\/10.1007\/978-1-4612-4380-9_16"},{"issue":"1","key":"10_CR31","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1137\/0209009","volume":"9","author":"C-K Wong","year":"1980","unstructured":"Wong, C.-K., Easton, M.C.: An efficient method for weighted sampling without replacement. SIAM J. Comput. 9(1), 111\u2013113 (1980)","journal-title":"SIAM J. Comput."},{"issue":"1","key":"10_CR32","first-page":"97","volume":"26","author":"X Wu","year":"2013","unstructured":"Wu, X., Zhu, X., Wu, G.-Q., Ding, W.: Data mining with big data. IEEE Trans. Knowl. Data Eng. 26(1), 97\u2013107 (2013)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10_CR33","doi-asserted-by":"crossref","unstructured":"Zhao, Z., Christensen, R., Li, F., Hu, X., Yi, K.: Random sampling over joins revisited. In: Proceedings of the 2018 International Conference on Management of Data, pp. 1525\u20131539 (2018)","DOI":"10.1145\/3183713.3183739"}],"container-title":["Lecture Notes in Computer Science","Database and Expert Systems Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-68309-1_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,17]],"date-time":"2024-08-17T14:03:40Z","timestamp":1723903420000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-68309-1_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031683084","9783031683091"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-68309-1_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"18 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DEXA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Database and Expert Systems Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Naples","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"35","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dexa2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.dexa.org\/dexa2024\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}