{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T16:07:21Z","timestamp":1768925241385,"version":"3.49.0"},"reference-count":116,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"National Key Research and Development Plan","award":["2019YFB1705301"],"award-info":[{"award-number":["2019YFB1705301"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61572272"],"award-info":[{"award-number":["61572272"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["71690231"],"award-info":[{"award-number":["71690231"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/access.2020.2988120","type":"journal-article","created":{"date-parts":[[2020,4,15]],"date-time":"2020-04-15T21:38:43Z","timestamp":1586986723000},"page":"72713-72726","source":"Crossref","is-referenced-by-count":31,"title":["Sampling for Big Data Profiling: A Survey"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4265-0186","authenticated-orcid":false,"given":"Zhicheng","family":"Liu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4059-6913","authenticated-orcid":false,"given":"Aoqian","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3147.3165"},{"key":"ref38","first-page":"13","article-title":"An application of oversampling, undersampling, bagging and boosting in handling imbalanced datasets","author":"yap","year":"2013","journal-title":"Proc 1st Int Conf Adv Data Inf Eng"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/S0096-3003(03)00803-8"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2017.01.026"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/2857218.2857256"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2014.2327238"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/1150402.1150479"},{"key":"ref36","first-page":"253","article-title":"Cluster sampling to assess immunization coverage: A review of experience with a simplified sampling method","volume":"60","author":"henderson","year":"1982","journal-title":"Bull World Health Org"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1111\/j.1365-2818.1987.tb02837.x"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1214\/aos\/1176346500"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1023\/A:1006316418865"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3035918.3054772"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1158\/1055-9965.EPI-18-0797"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2966553"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.14778\/2732967.2732974"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2915232"},{"key":"ref24","first-page":"3","article-title":"Dataspaces: Co-existence with heterogeneity","author":"maier","year":"2006","journal-title":"Proc 10th Int Conf Princ Knowl Represent Reasoning Lake District United Kingdom"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-017-0466-5"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.14778\/2752939.2752946"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-012-0285-7"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1007\/s10844-007-0048-x"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2011.5767857"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1214\/18-AOAS1161SF"},{"key":"ref51","first-page":"311","article-title":"Sampling-based estimation of the number of distinct values of an attribute","author":"peter haas","year":"1995","journal-title":"Proc 21th Int Conf Very Large Data Bases VLDB 95"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2003.1232271"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/s13042-015-0345-6"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/1989323.1989401"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1145\/304182.304204"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1145\/581751.581753"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1145\/276304.276343"},{"key":"ref53","first-page":"541","article-title":"Distinct sampling for highly-accurate answers to distinct values queries and event reports","author":"phillip gibbons","year":"2001","journal-title":"Proceedings of the International Conference on Very Large Data Bases VLDB"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/335168.335230"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/j.fss.2014.01.016"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/775047.775114"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1016\/j.procs.2015.04.188","article-title":"A brief introduction on big data 5 Vs characteristics and Hadoop technology","volume":"48","author":"anuradha","year":"2015","journal-title":"Procedia Comput Sci"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2013.109"},{"key":"ref5","first-page":"15","article-title":"Mining big data in real time","volume":"37","author":"bifet","year":"2013","journal-title":"Informatica"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1002\/1944-2866.POI328"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1093\/poq\/36.3.407"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.26599\/BDMA.2019.9020015"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3010089.3010113"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1017\/S1049096514001796"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1111\/j.1740-9713.2014.00778.x"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.3233\/SJI-170395"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1111\/insr.12290"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-015-0030-3"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/BigDataCongress.2017.53"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/312129.312188"},{"key":"ref43","first-page":"367","article-title":"Static versus dynamic sampling for data mining","author":"george john","year":"1996","journal-title":"Proc 1st Intl Conf on Knowledge Discovery and Data Mining (KDD)"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2014.08.007"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-63962-8_8-1"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/BigData47090.2019.9006232"},{"key":"ref70","first-page":"207","article-title":"Fast approximate discovery of inclusion dependencies","author":"kruse","year":"2017","journal-title":"Datenbanksysteme f&#x00FC;r Business Technologie und Web"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1145\/582095.582099"},{"key":"ref77","author":"pohl","year":"1969","journal-title":"A Minimum Storage Algorithm for Computing the Median"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1145\/3186728.3164145"},{"key":"ref75","article-title":"The Optim. Queries relational databases","author":"kooi","year":"1980"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2908452"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1145\/2783258.2783317"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.14778\/2733004.2733022"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.2017.1408468"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/LDAV.2017.8231848"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.14257\/ijseia.2015.9.5.03"},{"key":"ref64","first-page":"691","article-title":"GORDIAN: Efficient and scalable discovery of composite keys","author":"sismanis","year":"2006","journal-title":"Proc 32nd Int Conf Very Large Data Bases"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1145\/2063576.2063801"},{"key":"ref66","first-page":"1803","article-title":"Approximate discovery of functional dependencies for large datasets","author":"bleifu\u00df","year":"2016","journal-title":"Proc ACM CIKM Int Conf Inf and Knowledge Management"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2915203"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1145\/1007568.1007641"},{"key":"ref2","author":"manyika","year":"2011","journal-title":"Big Data The Next Frontier for Innovation Competition and Productivity"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.26599\/BDMA.2019.9020019"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1007\/s11036-013-0489-0","article-title":"Big data: A survey","volume":"19","author":"chen","year":"2014","journal-title":"Mobile Netw Appl"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3196916"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-56039-4_34"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.14778\/2824032.2824056"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1145\/182591.182601"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2015.7113270"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.14778\/2732240.2732248"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2016.2594785"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2013.84"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.14778\/2536206.2536212"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2012.46"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.14778\/1687627.1687693"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2010.154"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.14778\/2977797.2977798"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1145\/2588555.2588570"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1016\/j.datak.2013.06.003"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.14778\/1687627.1687674"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1145\/1645953.1646135"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1145\/1366102.1366103"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45876-X_30"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2010.197"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2007.367920"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/RIDE.1997.583696"},{"key":"ref11","author":"pyle","year":"1999","journal-title":"Data Preparation for Data Mining"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-015-0389-y"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.14778\/2809974.2809989"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2882955"},{"key":"ref15","first-page":"1","article-title":"Sampling techniques & determination of sample size in applied statistics research: An overview","volume":"2","author":"singh","year":"2014","journal-title":"International Journal of Economics Commerce and Management"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/2590989.2590995"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/PDIS.1996.568665"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2016.7498363"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/69.553164"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2723372.2723730"},{"key":"ref84","first-page":"223","article-title":"Sampling: Design and analysis","volume":"42","author":"explained","year":"2009","journal-title":"Technometrics"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2915233"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-09156-3_49"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.14778\/2732977.2732991"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-010-0206-6"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-19548-3_1"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2019.00023"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1145\/3070647"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1145\/1807167.1807202"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1198\/tech.2005.s299"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1145\/2000824.2000826"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1016\/S0306-4379(01)00027-8"},{"key":"ref88","first-page":"342","article-title":"Data-driven schema normalization","author":"papenbrock","year":"2017","journal-title":"Proc Intl Conf on Extending Database Technology EDBT 2010"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/8948470\/09068262.pdf?arnumber=9068262","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,12,17]],"date-time":"2021-12-17T19:51:26Z","timestamp":1639770686000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9068262\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":116,"URL":"https:\/\/doi.org\/10.1109\/access.2020.2988120","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]}}}