{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:16:40Z","timestamp":1750220200964,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,14]],"date-time":"2022-08-14T00:00:00Z","timestamp":1660435200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61972401,61932001"],"award-info":[{"award-number":["61972401,61932001"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Beijing Outstanding Young Scientist Program","award":["BJJWZYJH012019100020098"],"award-info":[{"award-number":["BJJWZYJH012019100020098"]}]},{"name":"Beijing Natural Science Foundation","award":["4222028"],"award-info":[{"award-number":["4222028"]}]},{"name":"major key project of PCL","award":["PCL2021A12"],"award-info":[{"award-number":["PCL2021A12"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,14]]},"DOI":"10.1145\/3534678.3539390","type":"proceedings-article","created":{"date-parts":[[2022,8,12]],"date-time":"2022-08-12T19:06:12Z","timestamp":1660331172000},"page":"893-903","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Sampling-based Estimation of the Number of Distinct Values in Distributed Environment"],"prefix":"10.1145","author":[{"given":"Jiajun","family":"Li","sequence":"first","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"given":"Zhewei","family":"Wei","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"given":"Bolin","family":"Ding","sequence":"additional","affiliation":[{"name":"Alibaba Group, Bellevue, WA, USA"}]},{"given":"Xiening","family":"Dai","sequence":"additional","affiliation":[{"name":"Alibaba Group, Bellevue, WA, USA"}]},{"given":"Lu","family":"Lu","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]},{"given":"Jingren","family":"Zhou","sequence":"additional","affiliation":[{"name":"Alibaba Group, Bellevue, WA, USA"}]}],"member":"320","published-online":{"date-parts":[[2022,8,14]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1006\/jcss.1997.1545"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.5555\/646978.711822"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1198\/106186002760180572"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1993.10594330"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1976.10480344"},{"key":"e_1_3_2_2_6_1","volume-title":"Nonparametric estimation of the number of classes in a population. Scandinavian Journal of statistics","author":"Chao Anne","year":"1984","unstructured":"Anne Chao. 1984. Nonparametric estimation of the number of classes in a population. Scandinavian Journal of statistics (1984), 265--270."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1992.10475194"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/335168.335230"},{"volume-title":"International Colloquium on Automata, Languages, and Programming","author":"Charikar Moses","key":"e_1_3_2_2_9_1","unstructured":"Moses Charikar, Kevin Chen, and Martin Farach-Colton. 2002. Finding frequent items in data streams. In International Colloquium on Automata, Languages, and Programming. Springer, 693--703."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2019.2940705"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.5555\/982792.982815"},{"volume-title":"Small Summaries for Big Data","author":"Cormode Graham","key":"e_1_3_2_2_12_1","unstructured":"Graham Cormode and Ke Yi. 2020. Small Summaries for Big Data. Cambridge University Press."},{"key":"e_1_3_2_2_13_1","volume-title":"Extensive large-scale study of error in samping-based distinct value estimators for databases. arXiv preprint arXiv:1612.00476","author":"Deolalikar Vinay","year":"2016","unstructured":"Vinay Deolalikar and Hernan Laffitte. 2016. Extensive large-scale study of error in samping-based distinct value estimators for databases. arXiv preprint arXiv:1612.00476 (2016)."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"crossref","unstructured":"Philippe Flajolet \u00c9ric Fusy Olivier Gandouet and Fr\u00e9d\u00e9ric Meunier. 2007. Hyperloglog: the analysis of a near-optimal cardinality estimation algorithm. In Discrete Mathematics and Theoretical Computer Science. 137--156.","DOI":"10.46298\/dmtcs.3545"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/SFCS.1983.46"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/40.3-4.237"},{"volume-title":"The Generalized Jackknife Statistic","author":"Gray HL","key":"e_1_3_2_2_17_1","unstructured":"HL Gray and RR Schucany. 1972. The Generalized Jackknife Statistic, New York: Marcel Decker."},{"key":"e_1_3_2_2_18_1","first-page":"311","article-title":"Sampling-based estimation of the number of distinct values of an attribute","volume":"95","author":"Haas Peter J","year":"1995","unstructured":"Peter J Haas, Jeffrey F Naughton, S Seshadri, and Lynne Stokes. 1995. Sampling-based estimation of the number of distinct values of an attribute. In VLDB, Vol. 95. 311--322.","journal-title":"VLDB"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1998.10473807"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2452376.2452456"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/308386.308455"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/67544.66933"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/1147954.1147955"},{"volume-title":"Advances in Computers.","author":"Kushilevitz Eyal","key":"e_1_3_2_2_24_1","unstructured":"Eyal Kushilevitz. 1997. Communication complexity. In Advances in Computers. Vol. 44. Elsevier, 331--360."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2011.02.002"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.5555\/867576"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611972962.7"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-53507-1_98"},{"volume-title":"SUM, and AVERAGE relational algebra queries. In Database and Expert Systems Applications","author":"Ozsoyoglu Gultekin","key":"e_1_3_2_2_29_1","unstructured":"Gultekin Ozsoyoglu, Kaizheng Du, A Tjahjana, W-C Hou, and DY Rowland. 1991. On estimating COUNT, SUM, and AVERAGE relational algebra queries. In Database and Expert Systems Applications. Springer, 406--412."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-10424-4_17"},{"key":"e_1_3_2_2_31_1","volume-title":"Proceedings of the 13th ACM SIGKDD international conference on Knowledge discovery and data mining. 440--449","author":"Ping Li","year":"2007","unstructured":"Li Ping. 2007. Very sparse stable random projections for dimension reduction in l alpha (0 alpha= 2) norm. In Proceedings of the 13th ACM SIGKDD international conference on Knowledge discovery and data mining. 440--449."},{"key":"e_1_3_2_2_32_1","first-page":"97","article-title":"On estimation of the size of the dictionary of a long text on the basis of a sample","volume":"19","author":"Shlosser A","year":"1981","unstructured":"A Shlosser. 1981. On estimation of the size of the dictionary of a long text on the basis of a sample. Engineering Cybernetics, Vol. 19, 1 (1981), 97--102.","journal-title":"Engineering Cybernetics"},{"key":"e_1_3_2_2_33_1","volume-title":"Deterministic communication complexity of set intersection. Discrete applied mathematics","author":"Tamm Ulrich","year":"1995","unstructured":"Ulrich Tamm. 1995. Deterministic communication complexity of set intersection. Discrete applied mathematics, Vol. 61, 3 (1995), 271--283."},{"key":"e_1_3_2_2_34_1","unstructured":"Paul Valiant and Gregory Valiant. 2013. Estimating the Unseen: Improved Estimators for Entropy and other Properties.. In NIPS. 2157--2165."},{"key":"e_1_3_2_2_35_1","volume-title":"Proceedings of the eleventh annual ACM symposium on Theory of computing. 209--213","author":"Chi-Chih Yao Andrew","year":"1979","unstructured":"Andrew Chi-Chih Yao. 1979. Some complexity questions related to distributive computing (preliminary report). In Proceedings of the eleventh annual ACM symposium on Theory of computing. 209--213."}],"event":{"name":"KDD '22: The 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Washington DC USA","acronym":"KDD '22"},"container-title":["Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539390","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3534678.3539390","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:48Z","timestamp":1750186968000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539390"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,14]]},"references-count":35,"alternative-id":["10.1145\/3534678.3539390","10.1145\/3534678"],"URL":"https:\/\/doi.org\/10.1145\/3534678.3539390","relation":{},"subject":[],"published":{"date-parts":[[2022,8,14]]},"assertion":[{"value":"2022-08-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}