{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:17:28Z","timestamp":1750220248170,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":77,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,6,10]],"date-time":"2022-06-10T00:00:00Z","timestamp":1654819200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"USA Department of Energy","award":["DE-SC0020200"],"award-info":[{"award-number":["DE-SC0020200"]}]},{"name":"Swiss National Science Foundation","award":["Early Postdoc Mobility scholarship P2ELP2_199749"],"award-info":[{"award-number":["Early Postdoc Mobility scholarship P2ELP2_199749"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,6,10]]},"DOI":"10.1145\/3514221.3517894","type":"proceedings-article","created":{"date-parts":[[2022,6,12]],"date-time":"2022-06-12T02:33:49Z","timestamp":1655001229000},"page":"1640-1654","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Entropy-Learned Hashing: Constant Time Hashing with Controllable Uniformity"],"prefix":"10.1145","author":[{"given":"Brian","family":"Hentschel","sequence":"first","affiliation":[{"name":"Harvard University, Cambridge, MA, USA"}]},{"given":"Utku","family":"Sirin","sequence":"additional","affiliation":[{"name":"Harvard University, Cambridge, MA, USA"}]},{"given":"Stratos","family":"Idreos","sequence":"additional","affiliation":[{"name":"Harvard University, Cambridge, MA, USA"}]}],"member":"320","published-online":{"date-parts":[[2022,6,11]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n.d.]. gcc libstdc++ hash. https:\/\/github.com\/gcc-mirror\/gcc\/blob\/master\/libstdc%2B%2B-v3\/libsupc%2B%2B\/hash_bytes.cc. Accessed: 2021-05--23."},{"key":"e_1_3_2_1_2_1","unstructured":"2015. Hacker News Posts. https:\/\/www.kaggle.com\/hacker-news\/hacker-news-posts. Accessed: 2021-05--23."},{"volume-title":"Linker Throughput Improvement in Visual Studio","year":"2019","key":"e_1_3_2_1_3_1","unstructured":"2019. Linker Throughput Improvement in Visual Studio 2019. https:\/\/devblogs.microsoft.com\/cppblog\/linker-throughput-improvement-in-visual-studio-2019\/."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2016.2620435"},{"key":"e_1_3_2_1_5_1","first-page":"6","article-title":"Scalable Bloom","volume":"101","author":"Almeida Paulo S\u00e9rgio","year":"2007","unstructured":"Paulo S\u00e9rgio Almeida, Carlos Baquero, Nuno Pregui\u00e7a, and David Hutchison. 2007. Scalable Bloom Filters. Inf. Process. Lett. 101, 6 (March 2007), 255--261.","journal-title":"Filters. Inf. Process. Lett."},{"key":"e_1_3_2_1_6_1","unstructured":"Austin Appleby. [n.d.]. murmurhash3. https:\/\/github.com\/aappleby\/smhasher\/wiki\/MurmurHash3. Accessed: 2021-05--23."},{"key":"e_1_3_2_1_7_1","unstructured":"Austin Appleby. [n.d.]. smhasher suite. https:\/\/github.com\/aappleby\/smhasher. Accessed: 2021-05--23."},{"key":"e_1_3_2_1_8_1","volume-title":"Bernstein","author":"Aumasson Jean-Philippe","year":"2012","unstructured":"Jean-Philippe Aumasson and Daniel J. Bernstein. 2012. SipHash: A Fast Short-Input PRF. In Progress in Cryptology - INDOCRYPT 2012, Steven Galbraith and Mridul Nandi (Eds.). Springer Berlin Heidelberg, Berlin, Heidelberg, 489--508."},{"key":"e_1_3_2_1_9_1","volume-title":"International Conference on Machine Learning. PMLR, 609--618","author":"Balkanski Eric","year":"2021","unstructured":"Eric Balkanski, Sharon Qian, and Yaron Singer. 2021. Instance specific approximations for submodular maximization. In International Conference on Machine Learning. PMLR, 609--618."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.14778\/2732219.2732227"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/362686.362692"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","first-page":"155","DOI":"10.1080\/00031305.1980.10483022","article-title":"Expected Absolute Error of the Usual Estimator of the Binomial Parameter","volume":"34","author":"Blyth Colin R.","year":"1980","unstructured":"Colin R. Blyth. 1980. Expected Absolute Error of the Usual Estimator of the Binomial Parameter. The American Statistician 34, 3 (1980), 155--157. http:\/\/www.jstor.org\/stable\/2683873","journal-title":"The American Statistician"},{"key":"e_1_3_2_1_13_1","first-page":"2649","article-title":"FSST","volume":"13","author":"Boncz Peter","year":"2020","unstructured":"Peter Boncz, Thomas Neumann, and Viktor Leis. 2020. FSST: Fast Random Access String Compression. 13, 12 (2020), 2649--2661.","journal-title":"Fast Random Access String Compression."},{"key":"e_1_3_2_1_14_1","unstructured":"Andrei Broder Michael Mitzenmacher and Andrei Broder I Michael Mitzenmacher. 2002. Network Applications of Bloom Filters: A Survey. In Internet Mathematics. 636--646."},{"key":"e_1_3_2_1_15_1","volume-title":"SEQUENCES","author":"Broder Andrei Z.","year":"1997","unstructured":"Andrei Z. Broder. 1997. On the resemblance and containment of documents.. In SEQUENCES, Bruno Carpentieri, Alfredo De Santis, Ugo Vaccaro, and James A. Storer (Eds.). IEEE, 21--29. http:\/\/dblp.uni-trier.de\/db\/conf\/sequences\/sequences1997.html#Broder97"},{"key":"e_1_3_2_1_16_1","unstructured":"Nathan Bronson and Xiao Shi. [n.d.]. Open-sourcing F14 for faster more memory-efficient hash tables. https:\/\/engineering.fb.com\/2019\/04\/25\/developer-tools\/f14\/."},{"volume-title":"Proceedings of the Ninth Annual ACM Symposium on Theory of Computing","author":"Lawrence Carter J.","key":"e_1_3_2_1_17_1","unstructured":"J. Lawrence Carter and Mark N. Wegman. 1977. Universal Classes of Hash Functions (Extended Abstract). In Proceedings of the Ninth Annual ACM Symposium on Theory of Computing (Boulder, Colorado, USA) (STOC '77). Association for Computing Machinery, New York, NY, USA, 106--112."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.4086\/toc.2013.v009a030"},{"key":"e_1_3_2_1_19_1","unstructured":"Yann Collet. [n.d.]. xxHash. https:\/\/cyan4973.github.io\/xxHash\/. Accessed: 2021-05--23."},{"key":"e_1_3_2_1_20_1","volume-title":"Introduction to Algorithms","author":"Cormen Thomas H.","unstructured":"Thomas H. Cormen, Charles E. Leiserson, Ronald L. Rivest, and Clifford Stein. 2009. Introduction to Algorithms, Third Edition (3rd ed.). The MIT Press.","edition":"3"},{"key":"e_1_3_2_1_21_1","volume-title":"Advances in Neural Information Processing Systems","author":"Dai Zhenwei","year":"2020","unstructured":"Zhenwei Dai and Anshumali Shrivastava. 2020. Adaptive Learned Bloom Filter (Ada-BF): Efficient Utilization of the Classifier with Application to Real-Time Information Filtering on the Web. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M. F. Balcan, and H. Lin (Eds.), Vol. 33. Curran Associates, Inc., 11700--11710. https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/86b94dae7c6517ec1ac767fd2c136580-Paper.pdf"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3035918.3064054"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3276980"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3196927"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457273"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.14778\/3436905.3436919"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1006\/jagm.1997.0873"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.14778\/3389133.3389138"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"D. Eastlake and P. Jones. 2001. RFC3174: US Secure Hash Algorithm 1 (SHA1).","DOI":"10.17487\/rfc3174"},{"key":"e_1_3_2_1_30_1","volume-title":"Hyper-LogLog: the analysis of a near-optimal cardinality estimation algorithm. Discrete Mathematics & Theoretical Computer Science","author":"Flajolet P.","year":"2007","unstructured":"P. Flajolet, \u00c9ric Fusy, Olivier Gandouet, and Fr\u00e9d\u00e9ric Meunier. 2007. Hyper-LogLog: the analysis of a near-optimal cardinality estimation algorithm. Discrete Mathematics & Theoretical Computer Science (2007), 137--156."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/SFCS.1982.39"},{"key":"e_1_3_2_1_32_1","unstructured":"Google. [n.d.]. Abseil Common Libraries. https:\/\/github.com\/abseil\/abseil-cpp."},{"volume-title":"Game engine architecture (1 ed.)","author":"Gregory Jason","key":"e_1_3_2_1_33_1","unstructured":"Jason Gregory. 2009. Game engine architecture (1 ed.). Taylor & Francis Ltd."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3383133"},{"key":"e_1_3_2_1_36_1","unstructured":"Intel. 2021. Intel VTune Amplifier XE Performance Profiler. http:\/\/software.intel.com\/en-us\/articles\/intel-vtune-amplifier-xe\/."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/11841036_42"},{"key":"e_1_3_2_1_38_1","unstructured":"Donald E. Knuth. 1998. The Art of Computer Programming Volume 3: (2nd Ed.) Sorting and Searching. Addison Wesley Longman Publishing Co. Inc. USA."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1287\/opre.43.4.684"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.14778\/2856318.2856321"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3196909"},{"key":"e_1_3_2_1_42_1","unstructured":"Matt Kulukundis. [n.d.]. Designing a Fast Efficient Cache-friendly Hash Table Step by Step. https:\/\/www.youtube.com\/watch?v=ncHmEUmJZf4."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.14778\/3303753.3303757"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/s13389-015-0110-5"},{"key":"e_1_3_2_1_45_1","unstructured":"Linux. 2021. Perf Wiki. https:\/\/perf.wiki.kernel.org\/."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341302.3342076"},{"key":"e_1_3_2_1_47_1","volume-title":"Garnett (Eds.)","volume":"31","author":"Mitzenmacher Michael","year":"2018","unstructured":"Michael Mitzenmacher. 2018. A Model for Learned Bloom Filters and Optimizing by Sandwiching. In Advances in Neural Information Processing Systems, S. Bengio, H. Wallach, H. Larochelle, K. Grauman, N. Cesa-Bianchi, and R. Garnett (Eds.), Vol. 31. Curran Associates, Inc."},{"key":"e_1_3_2_1_48_1","volume-title":"Proceedings of the Annual ACM-SIAM Symposium on Discrete Algorithms, 746--755","author":"Mitzenmacher Michael","year":"2008","unstructured":"Michael Mitzenmacher and Salil Vadhan. 2008. Why simple hash functions work: Exploiting the entropy in a data stream. Proceedings of the Annual ACM-SIAM Symposium on Discrete Algorithms, 746--755."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics"},{"key":"e_1_3_2_1_51_1","volume-title":"An analysis of approximations for maximizing submodular set functions-I. Mathematical programming 14, 1","author":"Nemhauser George L","year":"1978","unstructured":"George L Nemhauser, Laurence A Wolsey, and Marshall L Fisher. 1978. An analysis of approximations for maximizing submodular set functions-I. Mathematical programming 14, 1 (1978), 265--294."},{"key":"e_1_3_2_1_52_1","volume-title":"Large-Scale Image Retrieval with Attentive Deep Local Features. International Conference on Computer Vision (ICCV)","author":"Noh Hyeonwoo","year":"2016","unstructured":"Hyeonwoo Noh, Andre Araujo, Jack Sim, and Bohyung Han. 2016. Large-Scale Image Retrieval with Attentive Deep Local Features. International Conference on Computer Vision (ICCV) (2016). http:\/\/arxiv.org\/abs\/1612.06321"},{"key":"e_1_3_2_1_53_1","volume-title":"Approximation, Randomization, and Combinatorial Optimization. Algorithms and Techniques, APPROX\/RANDOM","author":"Obremski Maciej","year":"2017","unstructured":"Maciej Obremski and Maciej Skorski. 2017. Renyi Entropy Estimation Revisited. In Approximation, Randomization, and Combinatorial Optimization. Algorithms and Techniques, APPROX\/RANDOM 2017, August 16--18, 2017, Berkeley, CA, USA (LIPIcs, Vol. 81), Klaus Jansen, Jos\u00e9 D. P. Rolim, David Williamson, and Santosh S. Vempala (Eds.). Schloss Dagstuhl - Leibniz-Zentrum f\u00fcr Informatik, 20:1--20:15."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1007\/s002360050048"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1137\/110827831"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jalgor.2003.12.002"},{"volume-title":"Automata, Languages and Programming","author":"Mikkel Thorup Mihai P","key":"e_1_3_2_1_57_1","unstructured":"Mihai P atracu and Mikkel Thorup. 2010. On the k-Independence Required by Linear Probing and Minwise Independence. In Automata, Languages and Programming. Springer Berlin Heidelberg, Berlin, Heidelberg, 715--726."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/1993636.1993638"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1147\/rd.12.0130"},{"key":"e_1_3_2_1_60_1","unstructured":"Geoff Pike and Jyrki Alakuijala. 2011. CityHash. https:\/\/github.com\/google\/cityhash."},{"key":"e_1_3_2_1_61_1","unstructured":"Geoff Pike and Jyrki Alakuijala. 2014. FarmHash. https:\/\/github.com\/google\/farmhash."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/2588555.2610522"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/50202.50223"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/67933.67941"},{"volume-title":"Database Management Systems (3 ed.)","author":"Ramakrishnan Raghu","key":"e_1_3_2_1_65_1","unstructured":"Raghu Ramakrishnan and Johannes Gehrke. 2002. Database Management Systems (3 ed.). McGraw-Hill, Inc., USA."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.14778\/2850583.2850585"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"crossref","unstructured":"R. Rivest. 1992. RFC1321: The MD5 Message-Digest Algorithm.","DOI":"10.17487\/rfc1321"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2007.368997"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.14778\/3380750.3380755"},{"key":"e_1_3_2_1_70_1","unstructured":"Oracle ZFS Steve Tunstall. 2017. DeDupe 2.0. https:\/\/blogs.oracle.com\/wonders-of-zfs-storage\/dedupe-20-v2. Accessed: 2021-05--23."},{"key":"e_1_3_2_1_71_1","unstructured":"Reini Urban. [n.d.]. SMHasher - Reini Urban Fork. https:\/\/github.com\/rurban\/smhasher. Accessed: 2021-05--23."},{"key":"e_1_3_2_1_72_1","volume-title":"Partitioned Learned Bloom Filters. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=6BRLOfrMhW","author":"Vaidya Kapil","year":"2021","unstructured":"Kapil Vaidya, Eric Knorr, Michael Mitzenmacher, and Tim Kraska. 2021. Partitioned Learned Bloom Filters. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=6BRLOfrMhW"},{"key":"e_1_3_2_1_73_1","volume-title":"Daniel Lemire, and Li Jin.","author":"Wang Yi","year":"2020","unstructured":"Yi Wang, Diego Barrios Romero, Daniel Lemire, and Li Jin. 2020. Modern Non- Cryptographic Hash Function and Pseudorandom Generator. (2020)."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.5555\/2033408.2033427"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1016\/0022-0000(81)90033--7"},{"key":"e_1_3_2_1_76_1","unstructured":"Oracle ZFS. 2019. ZFS Deduplication. https:\/\/blogs.oracle.com\/bonwick\/zfs-deduplication-v2. Accessed: 2021-05--23."},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.3390\/app10061915"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/3465998.3466002"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.3233\/ICG-1990-13203"}],"event":{"name":"SIGMOD\/PODS '22: International Conference on Management of Data","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"],"location":"Philadelphia PA USA","acronym":"SIGMOD\/PODS '22"},"container-title":["Proceedings of the 2022 International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3514221.3517894","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3514221.3517894","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:30:36Z","timestamp":1750188636000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3514221.3517894"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,10]]},"references-count":77,"alternative-id":["10.1145\/3514221.3517894","10.1145\/3514221"],"URL":"https:\/\/doi.org\/10.1145\/3514221.3517894","relation":{},"subject":[],"published":{"date-parts":[[2022,6,10]]},"assertion":[{"value":"2022-06-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}