{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:37:02Z","timestamp":1772908622674,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":26,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819755004","type":"print"},{"value":"9789819755011","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-5501-1_25","type":"book-chapter","created":{"date-parts":[[2024,7,26]],"date-time":"2024-07-26T03:48:02Z","timestamp":1721965682000},"page":"331-342","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["An Effective RSP Data Sampling Algorithm"],"prefix":"10.1007","author":[{"given":"Hanyu","family":"Yang","sequence":"first","affiliation":[]},{"given":"Xiaohui","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Jinglan","family":"Deng","sequence":"additional","affiliation":[]},{"given":"Jianfei","family":"Yin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,7,27]]},"reference":[{"key":"25_CR1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-14142-8","volume-title":"Data Mining: The Textbook","author":"CC Aggarwal","year":"2015","unstructured":"Aggarwal, C.C., et al.: Data Mining: The Textbook, vol. 1. Springer, Heidelberg (2015). https:\/\/doi.org\/10.1007\/978-3-319-14142-8"},{"key":"25_CR2","doi-asserted-by":"crossref","unstructured":"Bachem, O., Lucic, M., Krause, A.: Scalable k-means clustering via lightweight coresets. In: Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, pp. 1119\u20131127 (2018)","DOI":"10.1145\/3219819.3219973"},{"key":"25_CR3","unstructured":"Balcan, M.F.F., Ehrlich, S., Liang, Y.: Distributed $$ k $$-means and $$ k $$-median clustering on general topologies. Adv. Neural Inf. Process. Syst. 26 (2013)"},{"key":"25_CR4","unstructured":"BC, Y.A., HA, D., MR, L.: Estimation of minimal initial sample size in progressive sampling for big data analytics. J. Theor. Appl. Inf. Technol. 101(13) (2023)"},{"key":"25_CR5","unstructured":"Blackard, J.: Covertype. UCI Machine Learning Repository (1998)"},{"key":"25_CR6","unstructured":"Cantini, R., et al.: Block size estimation for data partitioning in HPC applications using machine learning techniques. CoRR arxiv:2211.10819 (2022)"},{"key":"25_CR7","doi-asserted-by":"crossref","unstructured":"Chen, X., Cheng, J.Q., Xie, M.: Divide-and-conquer methods for big data analysis. CoRR arxiv:2102.10771 (2021)","DOI":"10.1002\/9781118445112.stat08298"},{"key":"25_CR8","doi-asserted-by":"publisher","unstructured":"Ci, X., Meng, X.: An efficient block sampling strategy for online aggregation in the cloud. In: Dong, X., Yu, X., Li, J., Sun, Y. (eds.) International Conference on Web-Age Information Management, pp. 362\u2013373. Springer, Heidelberg (2015). https:\/\/doi.org\/10.1007\/978-3-319-21042-1_29","DOI":"10.1007\/978-3-319-21042-1_29"},{"key":"25_CR9","unstructured":"Dayimu, A., Simidjievski, N., Demiris, N., Abraham, J.: Sample size determination via learning-type curves. arXiv preprint arXiv:2303.09575 (2023)"},{"key":"25_CR10","doi-asserted-by":"crossref","unstructured":"Feldman, D., Monemizadeh, M., Sohler, C.: A ptas for k-means clustering based on weak coresets. In: Proceedings of the Twenty-Third Annual Symposium on Computational Geometry, pp. 11\u201318 (2007)","DOI":"10.1145\/1247069.1247072"},{"key":"25_CR11","unstructured":"van\u00a0de Geer, S.: Empirical process theory and applications (2000)"},{"key":"25_CR12","doi-asserted-by":"crossref","unstructured":"Har-Peled, S., Mazumdar, S.: On coresets for k-means and k-median clustering. In: Proceedings of the Thirty-Sixth Annual ACM Symposium on Theory of Computing, pp. 291\u2013300 (2004)","DOI":"10.1145\/1007352.1007400"},{"issue":"5","key":"25_CR13","doi-asserted-by":"publisher","DOI":"10.1007\/s11704-023-2356-x","volume":"18","author":"Y He","year":"2024","unstructured":"He, Y., Chen, J., Shen, J., Fournier-Viger, P., Huang, J.Z.: Density estimation-based method to determine sample size for random sample partition of big data. Front. Comput. Sci. 18(5), 185322 (2024)","journal-title":"Front. Comput. Sci."},{"key":"25_CR14","doi-asserted-by":"publisher","unstructured":"Hoeffding, W.: Probability inequalities for sums of bounded random variables. In: Fisher, N.I., Sen, P.K. (eds.) The Collected Works of Wassily Hoeffding, pp. 409\u2013426. Springer, Heidelberg (1994). https:\/\/doi.org\/10.1007\/978-1-4612-0865-5_26","DOI":"10.1007\/978-1-4612-0865-5_26"},{"key":"25_CR15","doi-asserted-by":"crossref","unstructured":"Kumar, A., Sabharwal, Y., Sen, S.: A simple linear time (1+\/spl epsiv\/)-approximation algorithm for k-means clustering in any dimensions. In: 45th Annual IEEE Symposium on Foundations of Computer Science, pp. 454\u2013462. IEEE (2004)","DOI":"10.1109\/FOCS.2004.7"},{"key":"25_CR16","doi-asserted-by":"crossref","unstructured":"Lin, J., Kolcz, A.: Large-scale machine learning at twitter. In: Proceedings of the 2012 ACM SIGMOD International Conference on Management of Data, pp. 793\u2013804 (2012)","DOI":"10.1145\/2213836.2213958"},{"key":"25_CR17","unstructured":"Lucic, M., Bachem, O., Krause, A.: Strong coresets for hard and soft bregman clustering with applications to exponential family mixtures. In: Artificial Intelligence and Statistics, pp.\u00a01\u20139. PMLR (2016)"},{"issue":"1","key":"25_CR18","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1007\/s004540010019","volume":"24","author":"J Matou\u0161ek","year":"2000","unstructured":"Matou\u0161ek, J.: On approximate geometric k-clustering. Disc. Comput. Geom. 24(1), 61\u201384 (2000)","journal-title":"Disc. Comput. Geom."},{"key":"25_CR19","doi-asserted-by":"crossref","unstructured":"Salloum, S., Huang, J.Z.: Rsp-hist: approximate histograms for big data exploration on hadoop clusters. In: 2021 IEEE 28th International Conference on High Performance Computing, Data, and Analytics (HiPC), pp. 412\u2013417. IEEE (2021)","DOI":"10.1109\/HiPC53243.2021.00058"},{"issue":"11","key":"25_CR20","doi-asserted-by":"publisher","first-page":"5846","DOI":"10.1109\/TII.2019.2912723","volume":"15","author":"S Salloum","year":"2019","unstructured":"Salloum, S., Huang, J.Z., He, Y.: Random sample partition: a distributed data model for big data analysis. IEEE Trans. Ind. Inf. 15(11), 5846\u20135854 (2019). https:\/\/doi.org\/10.1109\/TII.2019.2912723","journal-title":"IEEE Trans. Ind. Inf."},{"key":"25_CR21","doi-asserted-by":"publisher","first-page":"3675","DOI":"10.1109\/ACCESS.2018.2889355","volume":"7","author":"S Salloum","year":"2018","unstructured":"Salloum, S., Huang, J.Z., He, Y., Chen, X.: An asymptotic ensemble learning framework for big data analysis. IEEE Access 7, 3675\u20133693 (2018)","journal-title":"IEEE Access"},{"key":"25_CR22","doi-asserted-by":"publisher","unstructured":"Shvachko, K., Kuang, H., Radia, S., Chansler, R.: The hadoop distributed file system. In: 2010 IEEE 26th Symposium on Mass Storage Systems and Technologies (MSST), pp. 1\u201310 (2010). https:\/\/doi.org\/10.1109\/MSST.2010.5496972","DOI":"10.1109\/MSST.2010.5496972"},{"key":"25_CR23","doi-asserted-by":"crossref","unstructured":"Singh, T., Khanna, R., Kumar, M., et\u00a0al.: Multiclass imbalanced big data classification utilizing spark cluster. In: 2021 12th International Conference on Computing Communication and Networking Technologies (ICCCNT), pp.\u00a01\u20137. IEEE (2021)","DOI":"10.1109\/ICCCNT51525.2021.9580029"},{"key":"25_CR24","doi-asserted-by":"publisher","first-page":"89791","DOI":"10.1109\/ACCESS.2023.3307512","volume":"11","author":"AS Tarawneh","year":"2023","unstructured":"Tarawneh, A.S., Alamri, E.S., Al-Saedi, N.N., Alauthman, M., Hassanat, A.B.: CTELC: a constant-time ensemble learning classifier based on KNN for big data. IEEE Access 11, 89791\u201389802 (2023)","journal-title":"IEEE Access"},{"key":"25_CR25","unstructured":"Vidulin, V., Lustrek, M., Kaluza, B., Piltaver, R., Krivec, J.: Localization Data for Person Activity. UCI Machine Learning Repository (2010)"},{"key":"25_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"448","DOI":"10.1007\/978-3-030-60245-1_31","volume-title":"Algorithms and Architectures for Parallel Processing","author":"C Wei","year":"2020","unstructured":"Wei, C., Zhang, J., Valiullin, T., Cao, W., Wang, Q., Long, H.: Distributed and parallel ensemble classification for big data based on Kullback-Leibler random sample partition. In: Qiu, M. (ed.) ICA3PP 2020. LNCS, vol. 12452, pp. 448\u2013464. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-60245-1_31"}],"container-title":["Lecture Notes in Computer Science","Knowledge Science, Engineering and Management"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-5501-1_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,24]],"date-time":"2024-11-24T21:29:22Z","timestamp":1732483762000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-5501-1_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819755004","9789819755011"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-5501-1_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"27 July 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"KSEM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Knowledge Science, Engineering and Management","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Birmingham","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ksem2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ai-edge.net\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}