{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T19:47:07Z","timestamp":1743018427904,"version":"3.40.3"},"publisher-location":"Cham","reference-count":19,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030676575"},{"type":"electronic","value":"9783030676582"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-67658-2_27","type":"book-chapter","created":{"date-parts":[[2021,2,24]],"date-time":"2021-02-24T12:03:33Z","timestamp":1614168213000},"page":"473-489","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["LOAD: LSH-Based $$\\ell _0$$-Sampling over Stream Data with Near-Duplicates"],"prefix":"10.1007","author":[{"given":"Dingzhu","family":"Lurong","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanlong","family":"Wen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiangwei","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaojie","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,2,25]]},"reference":[{"key":"27_CR1","doi-asserted-by":"crossref","unstructured":"Chen, J., Zhang, Q.: Distinct sampling on streaming data with near-duplicates. In: Proceedings of the 37th ACM SIGMOD-SIGACT-SIGAI Symposium on Principles of Database Systems, pp. 369\u2013382. ACM (2018)","DOI":"10.1145\/3196959.3196978"},{"key":"27_CR2","doi-asserted-by":"crossref","unstructured":"Chen, D., Zhang, Q.: Streaming algorithms for robust distinct elements. In: Proceedings of the 2016 International Conference on Management of Data, pp. 1433\u20131447. ACM (2016)","DOI":"10.1145\/2882903.2882915"},{"key":"27_CR3","doi-asserted-by":"crossref","unstructured":"Indyk, P., Motwani, R.: Approximate nearest neighbors: towards removing the curse of dimensionality. In: Proceedings of the Thirtieth Annual ACM Symposium on Theory of Computing, pp. 604\u2013613. ACM (1998)","DOI":"10.1145\/276698.276876"},{"issue":"9","key":"27_CR4","doi-asserted-by":"publisher","first-page":"2604","DOI":"10.1109\/JPROC.2012.2193849","volume":"100","author":"M Slaney","year":"2012","unstructured":"Slaney, M., He, J., Lifshits, Y.: Optimal parameters for locality-sensitive hashing. Proc. IEEE 100(9), 2604\u20132623 (2012)","journal-title":"Proc. IEEE"},{"key":"27_CR5","unstructured":"Krizhevsky, A.: Learning multiple layers of features from tiny images. Technical report (2009)"},{"key":"27_CR6","doi-asserted-by":"crossref","unstructured":"Mukherjee, S., Asnani, H., Lin, E., Kannan, S.: Clustergan: latent space clustering in generative adversarial networks. In: Proceedings of the AAAI Conference on Artificial Intelligence 33, 4610\u20134617 (2019)","DOI":"10.1609\/aaai.v33i01.33014610"},{"issue":"3","key":"27_CR7","doi-asserted-by":"publisher","first-page":"315","DOI":"10.1007\/s10619-013-7131-9","volume":"32","author":"G Cormode","year":"2014","unstructured":"Cormode, G., Firmani, D.: A unifying framework for l0-sampling algorithms. Distrib. Parallel Databases 32(3), 315\u2013335 (2014)","journal-title":"Distrib. Parallel Databases"},{"issue":"1","key":"27_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TKDE.2007.250581","volume":"19","author":"AK Elmagarmid","year":"2006","unstructured":"Elmagarmid, A.K., Ipeirotis, P.G., Verykios, V.S.: Duplicate record detection: a survey. IEEE Trans. Knowl. Data Engineering 19(1), 1\u201316 (2006)","journal-title":"IEEE Trans. Knowl. Data Engineering"},{"key":"27_CR9","doi-asserted-by":"crossref","unstructured":"Frahling, G., Indyk, P., Sohler, C.: Sampling in dynamic data streams and applications. In: Symposium on Computational Geometry (2005)","DOI":"10.1145\/1064092.1064116"},{"key":"27_CR10","doi-asserted-by":"crossref","unstructured":"Gibbons, P.B., Tirthapura., S.: Estimating simple functions on the union of data streams. In: Proceedings of the Thirteenth Annual ACM Symposium on Parallel Algorithms and Architectures, pp. 281\u2013291. ACM (2001)","DOI":"10.1145\/378580.378687"},{"key":"27_CR11","unstructured":"Babcock, B., Datar, M., Motwani, R.: Sampling from a moving window over streaming data. In: Proceedings of the Thirteenth Annual ACM-SIAM Symposium on Discrete Algorithms, pp. 633\u2013634. Society for Industrial and Applied Mathematics (2002)"},{"key":"27_CR12","doi-asserted-by":"crossref","unstructured":"Chung, Y.-Y., Tirthapura, S.: Distinct random sampling from a distributed stream. In: 2015 IEEE International Parallel and Distributed Processing Symposium, pp. 532\u2013541. IEEE (2015)","DOI":"10.1109\/IPDPS.2015.97"},{"key":"27_CR13","doi-asserted-by":"crossref","unstructured":"Ba, K.D., Indyk, P., Price, E., Woodruff, D.P.: Lower bounds for sparse recovery. In: Proceedings of the Twenty-First Annual ACM-SIAM Symposium on Discrete Algorithms, pp. 1190\u20131197. SIAM (2010)","DOI":"10.1137\/1.9781611973075.95"},{"key":"27_CR14","doi-asserted-by":"crossref","unstructured":"Jowhari, H., Sa\u011flam, M., Tardos, G.: Tight bounds for LP samplers, finding duplicates in streams, and related problems. In: Proceedings of the thirtieth ACM SIGMOD-SIGACT-SIGART Symposium on Principles of Database Systems, pp. 49\u201358. ACM (2011)","DOI":"10.1145\/1989284.1989289"},{"issue":"2","key":"27_CR15","doi-asserted-by":"publisher","first-page":"182","DOI":"10.1016\/0022-0000(85)90041-8","volume":"31","author":"P Flajolet","year":"1985","unstructured":"Flajolet, P., Martin, G.N.: Probabilistic counting algorithms for data base applications. J. Comput. Syst. Sci. 31(2), 182\u2013209 (1985)","journal-title":"J. Comput. Syst. Sci."},{"key":"27_CR16","doi-asserted-by":"crossref","unstructured":"Beyer, K., Haas, P.J., Reinwald, B., Sismanis, Y., Gemulla, R.: On synopses for distinct-value estimation under multiset operations. In: Proceedings of the 2007 ACM SIGMOD International Conference on Management of Data, pp. 199\u2013210. ACM (2007)","DOI":"10.1145\/1247480.1247504"},{"issue":"3","key":"27_CR17","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1016\/j.tcs.2007.02.031","volume":"378","author":"S Ganguly","year":"2007","unstructured":"Ganguly, S.: Counting distinct items over update streams. Theoret. Comput. Sci. 378(3), 211\u2013222 (2007)","journal-title":"Theoret. Comput. Sci."},{"key":"27_CR18","doi-asserted-by":"crossref","unstructured":"Kane, D.M., Nelson, J., Woodruff, D.P.: An optimal algorithm for the distinct elements problem. In: Proceedings of the Twenty-Ninth ACM SIGMOD-SIGACT-SIGART Symposium on Principles of Database Systems, pp. 41\u201352. ACM (2010)","DOI":"10.1145\/1807085.1807094"},{"key":"27_CR19","doi-asserted-by":"crossref","unstructured":"Zhang, Q.: Communication-efficient computation on distributed noisy datasets. In: Proceedings of the 27th ACM Symposium on Parallelism in Algorithms and Architectures, pp. 313\u2013322. ACM (2015)","DOI":"10.1145\/2755573.2755575"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-67658-2_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,23]],"date-time":"2025-02-23T23:07:23Z","timestamp":1740352043000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-67658-2_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030676575","9783030676582"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-67658-2_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"25 February 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ghent","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Belgium","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecmlpkdd2020.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"945","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"195","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"21% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4,5","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4,4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference took place virtually due to the COVID-19 pandemic","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}