{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,18]],"date-time":"2026-01-18T07:40:36Z","timestamp":1768722036002,"version":"3.49.0"},"reference-count":25,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2023,12,16]],"date-time":"2023-12-16T00:00:00Z","timestamp":1702684800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,16]],"date-time":"2023-12-16T00:00:00Z","timestamp":1702684800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1007\/s11704-023-2356-x","type":"journal-article","created":{"date-parts":[[2023,12,16]],"date-time":"2023-12-16T08:01:27Z","timestamp":1702713687000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Density estimation-based method to determine sample size for random sample partition of big data"],"prefix":"10.1007","volume":"18","author":[{"given":"Yulin","family":"He","sequence":"first","affiliation":[]},{"given":"Jiaqi","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Jiaxing","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Philippe","family":"Fournier-Viger","sequence":"additional","affiliation":[]},{"given":"Joshua Zhexue","family":"Huang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,16]]},"reference":[{"issue":"5","key":"2356_CR1","doi-asserted-by":"publisher","first-page":"999","DOI":"10.1109\/TPDS.2017.2784423","volume":"29","author":"M Sookhak","year":"2018","unstructured":"Sookhak M, Yu F R, Zomaya A Y. Auditing big data storage in cloud computing using divide and conquer tables. IEEE Transactions on Parallel and Distributed Systems, 2018, 29(5): 999\u20131012","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"issue":"2","key":"2356_CR2","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1002\/cpe.3436","volume":"28","author":"S Y Zhao","year":"2016","unstructured":"Zhao S Y, Li R X, Tian W L, Xiao W J, Dong X H, Liao D J, Khan S U, Li K Q. Divide-and-conquer approach for solving singular value decomposition based on MapReduce. Concurrency and Computation: Practice and Experience, 2016, 28(2): 331\u2013350","journal-title":"Concurrency and Computation: Practice and Experience"},{"key":"2356_CR3","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1016\/j.procs.2015.04.108","volume":"48","author":"M R Ghazi","year":"2015","unstructured":"Ghazi M R, Gangodkar D. Hadoop, MapReduce and HDFS: a developers perspective. Procedia Computer Science, 2015, 48: 45\u201350","journal-title":"Procedia Computer Science"},{"key":"2356_CR4","doi-asserted-by":"crossref","unstructured":"Neha M P, Narendra M P, Hasan M I, Parth D S, Mayur M P. Improving HDFS write performance using efficient replica placement. In: Proceedings of the 5th International Conference-Confluence the Next Generation Information Technology Summit. 2014, 36\u201339","DOI":"10.1109\/CONFLUENCE.2014.6949234"},{"issue":"11","key":"2356_CR5","doi-asserted-by":"publisher","first-page":"5846","DOI":"10.1109\/TII.2019.2912723","volume":"15","author":"S Salloum","year":"2019","unstructured":"Salloum S, Huang J Z, He Y L. Random sample partition: a distributed data model for big data analysis. IEEE Transactions on Industrial Informatics, 2019, 15(11): 5846\u20135854","journal-title":"IEEE Transactions on Industrial Informatics"},{"key":"2356_CR6","doi-asserted-by":"crossref","unstructured":"Wei C H, Salloum S, Emara T Z, Zhang X L, Huang J Z, He Y L. A two-stage data processing algorithm to generate random sample partitions for big data analysis. In: Proceedings of the 11th International Conference on Cloud Computing. 2018, 347\u2013364","DOI":"10.1007\/978-3-319-94295-7_24"},{"key":"2356_CR7","volume-title":"Statistics: An Introductory Analysis","author":"T Yamane","year":"1967","unstructured":"Yamane T. Statistics: An Introductory Analysis. 2nd ed. New York: Harper and Row, 1967","edition":"2nd ed"},{"key":"2356_CR8","volume-title":"Sampling Techniques","author":"W G Cochran","year":"2007","unstructured":"Cochran W G. Sampling Techniques. New York: John Wiley & Sons, 2007"},{"key":"2356_CR9","volume-title":"Sampling considerations in evaluating cooperative extension programs","author":"M F Smith","year":"1983","unstructured":"Smith M F. Sampling considerations in evaluating cooperative extension programs. Gainesville: Florida Cooperative Extension Service, Institute of Food and Agricultural Sciences, University of Florida, 1983"},{"key":"2356_CR10","doi-asserted-by":"publisher","first-page":"109088","DOI":"10.1016\/j.spl.2021.109088","volume":"173","author":"M Naaman","year":"2021","unstructured":"Naaman M. On the tight constant in the multivariate Dvoretzky-Kiefer-Wolfowitz inequality. Statistics & Probability Letters, 2021, 173: 109088","journal-title":"Statistics & Probability Letters"},{"issue":"4","key":"2356_CR11","doi-asserted-by":"publisher","first-page":"795","DOI":"10.1111\/rssb.12050","volume":"76","author":"A Kleiner","year":"2014","unstructured":"Kleiner A, Talwalkar A, Sarkar P, Jordan M I. A scalable bootstrap for massive data. Journal of the Royal Statistical Society Series B: Statistical Methodology, 2014, 76(4): 795\u2013816","journal-title":"Journal of the Royal Statistical Society Series B: Statistical Methodology"},{"issue":"6062","key":"2356_CR12","doi-asserted-by":"publisher","first-page":"1518","DOI":"10.1126\/science.1205438","volume":"334","author":"D N Reshef","year":"2011","unstructured":"Reshef D N, Reshef Y A, Finucane H K, Grossman S R, McVean G, Turnbaugh P J, Lander E S, Mitzenmacher M, Sabeti P C. Detecting novel associations in large data sets. Science, 2011, 334(6062): 1518\u20131524","journal-title":"Science"},{"issue":"515","key":"2356_CR13","doi-asserted-by":"publisher","first-page":"1222","DOI":"10.1080\/01621459.2015.1080709","volume":"111","author":"S Sengupta","year":"2016","unstructured":"Sengupta S, Volgushev S, Shao X F. A subsampled double bootstrap for massive data. Journal of the American Statistical Association, 2016, 111(515): 1222\u20131232","journal-title":"Journal of the American Statistical Association"},{"issue":"17","key":"2356_CR14","doi-asserted-by":"publisher","first-page":"1933","DOI":"10.1002\/sim.4780141709","volume":"14","author":"R H Browne","year":"1995","unstructured":"Browne R H. On the use of a pilot sample for sample size determination. Statistics in Medicine, 1995, 14(17): 1933\u20131940","journal-title":"Statistics in Medicine"},{"issue":"3","key":"2356_CR15","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1198\/000313001317098149","volume":"55","author":"R V Lenth","year":"2002","unstructured":"Lenth R V. Some practical guidelines for effective sample size determination. The American Statistician, 2002, 55(3): 187\u2013193","journal-title":"The American Statistician"},{"issue":"2","key":"2356_CR16","doi-asserted-by":"publisher","first-page":"51","DOI":"10.13170\/aijst.1.2.127","volume":"1","author":"W M A W Ahmad","year":"2012","unstructured":"Ahmad W M A W, Amin W A A W M, Aleng N A, Mohamed N. Some practical guidelines for effective sample-size determination in observational studies. Aceh International Journal of Science and Technology, 2012, 1(2): 51\u201353","journal-title":"Aceh International Journal of Science and Technology"},{"issue":"4","key":"2356_CR17","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1016\/j.aucc.2012.07.002","volume":"25","author":"E Burmeister","year":"2012","unstructured":"Burmeister E, Aitken L M. Sample size: how many is enough? Australian Critical Care, 2012, 25(4): 271\u2013274","journal-title":"Australian Critical Care"},{"issue":"1","key":"2356_CR18","doi-asserted-by":"publisher","first-page":"13036","DOI":"10.1038\/s41598-019-49539-6","volume":"9","author":"S Okada","year":"2019","unstructured":"Okada S, Ohzeki M, Taguchi S. Efficient partition of integer optimization problems with one-hot encoding. Scientific Reports, 2019, 9(1): 13036","journal-title":"Scientific Reports"},{"issue":"1","key":"2356_CR19","doi-asserted-by":"publisher","first-page":"182","DOI":"10.1109\/TAI.2022.3151724","volume":"4","author":"Y L He","year":"2023","unstructured":"He Y L, Ye X, Huang D F, Fournier-Viger P, Huang J Z. A hybrid method to measure distribution consistency of mixed-attribute datasets. IEEE Transactions on Artificial Intelligence, 2023, 4(1): 182\u2013196","journal-title":"IEEE Transactions on Artificial Intelligence"},{"issue":"3","key":"2356_CR20","doi-asserted-by":"publisher","first-page":"1065","DOI":"10.1214\/aoms\/1177704472","volume":"33","author":"E Parzen","year":"1962","unstructured":"Parzen E. On estimation of a probability density function and mode. The Annals of Mathematical Statistics, 1962, 33(3): 1065\u20131076","journal-title":"The Annals of Mathematical Statistics"},{"key":"2356_CR21","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1016\/j.ins.2019.04.010","volume":"491","author":"J Jiang","year":"2019","unstructured":"Jiang J, He Y L, Dai D X, Huang J Z. A new kernel density estimator based on the minimum entropy of data set. Information Sciences, 2019, 491: 223\u2013231","journal-title":"Information Sciences"},{"issue":"433","key":"2356_CR22","doi-asserted-by":"publisher","first-page":"401","DOI":"10.1080\/01621459.1996.10476701","volume":"91","author":"M C Jones","year":"1996","unstructured":"Jones M C, Marron J S, Sheather S J. A brief survey of bandwidth selection for density estimation. Journal of the American Statistical Association, 1996, 91(433): 401\u2013407","journal-title":"Journal of the American Statistical Association"},{"key":"2356_CR23","doi-asserted-by":"crossref","unstructured":"Perez-Cruz F. Kullback-Leibler divergence estimation of continuous distributions. In: Proceedings of 2008 IEEE International Symposium on Information Theory. 2008, 1666\u20131670","DOI":"10.1109\/ISIT.2008.4595271"},{"key":"2356_CR24","unstructured":"Perez-Cruz F. Estimation of information theoretic measures for continuous random variables. In: Proceedings of the 21st International Conference on Neural Information Processing Systems. 2008, 1257\u20131264"},{"issue":"1","key":"2356_CR25","doi-asserted-by":"publisher","first-page":"111201","DOI":"10.1007\/s11432-022-3538-4","volume":"66","author":"Y Y Yan","year":"2023","unstructured":"Yan Y Y, Cheng D Z, Feng J E, Li H T, Yue J M. Survey on applications of algebraic state space theory of logical systems to finite state machines. Science China Information Sciences, 2023, 66(1): 111201","journal-title":"Science China Information Sciences"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-023-2356-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11704-023-2356-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-023-2356-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T20:23:39Z","timestamp":1763583819000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11704-023-2356-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,16]]},"references-count":25,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,10]]}},"alternative-id":["2356"],"URL":"https:\/\/doi.org\/10.1007\/s11704-023-2356-x","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"value":"2095-2228","type":"print"},{"value":"2095-2236","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,12,16]]},"assertion":[{"value":"14 June 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 April 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 December 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"185322"}}