{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T22:32:01Z","timestamp":1765233121368,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,6,9]],"date-time":"2021-06-09T00:00:00Z","timestamp":1623196800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,6,9]]},"DOI":"10.1145\/3448016.3452824","type":"proceedings-article","created":{"date-parts":[[2021,6,18]],"date-time":"2021-06-18T17:22:39Z","timestamp":1624036959000},"page":"1064-1076","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":25,"title":["Auto-FuzzyJoin"],"prefix":"10.1145","author":[{"given":"Peng","family":"Li","sequence":"first","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"Xiang","family":"Cheng","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"Xu","family":"Chu","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"Yeye","family":"He","sequence":"additional","affiliation":[{"name":"Microsoft Research, Redmond, WA, USA"}]},{"given":"Surajit","family":"Chaudhuri","sequence":"additional","affiliation":[{"name":"Microsoft Research, Redmond, WA, USA"}]}],"member":"320","published-online":{"date-parts":[[2021,6,18]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"[n.d.]. Alteryx: Fuzzy Match Documentation. https:\/\/help.alteryx.com\/2018.2\/ FuzzyMatch.htm.  [n.d.]. Alteryx: Fuzzy Match Documentation. https:\/\/help.alteryx.com\/2018.2\/ FuzzyMatch.htm."},{"key":"e_1_3_2_2_2_1","unstructured":"[n.d.]. Benchmark datasets for entity resolution. https:\/\/dbs.unileipzig. de\/en\/research\/projects\/object_matching\/fever\/benchmark_datasets_ for_entity_resolution.  [n.d.]. Benchmark datasets for entity resolution. https:\/\/dbs.unileipzig. de\/en\/research\/projects\/object_matching\/fever\/benchmark_datasets_ for_entity_resolution."},{"key":"e_1_3_2_2_3_1","unstructured":"[n.d.]. Excel: Fuzzy Lookup Add-In. https:\/\/www.microsoft.com\/en-us\/download\/ details.aspx?id=15011. ([n. d.]).  [n.d.]. Excel: Fuzzy Lookup Add-In. https:\/\/www.microsoft.com\/en-us\/download\/ details.aspx?id=15011. ([n. d.])."},{"key":"e_1_3_2_2_4_1","unstructured":"[n.d.]. Fuzzy Lookup in SQL Server. https:\/\/docs.microsoft.com\/en-us\/sql\/ integration-services\/data-flow\/transformations\/fuzzy-lookup-transformation.  [n.d.]. Fuzzy Lookup in SQL Server. https:\/\/docs.microsoft.com\/en-us\/sql\/ integration-services\/data-flow\/transformations\/fuzzy-lookup-transformation."},{"key":"e_1_3_2_2_5_1","unstructured":"[n.d.]. OpenRefine Fuzzy Reconciliation. https:\/\/github.com\/OpenRefine\/ OpenRefine\/wiki\/Reconciliation.  [n.d.]. OpenRefine Fuzzy Reconciliation. https:\/\/github.com\/OpenRefine\/ OpenRefine\/wiki\/Reconciliation."},{"key":"e_1_3_2_2_6_1","unstructured":"[n.d.]. Python string match library: py_stringmatching. http:\/\/anhaidgroup. github.io\/py_stringmatching\/v0.4.1\/Tutorial.html.  [n.d.]. Python string match library: py_stringmatching. http:\/\/anhaidgroup. github.io\/py_stringmatching\/v0.4.1\/Tutorial.html."},{"key":"e_1_3_2_2_7_1","unstructured":"2019.7.12. Duplicate Detection Record Linkage and Identity Uncertainty: Datasets. http:\/\/www.cs.utexas.edu\/users\/ml\/riddle\/data.html.  2019.7.12. Duplicate Detection Record Linkage and Identity Uncertainty: Datasets. http:\/\/www.cs.utexas.edu\/users\/ml\/riddle\/data.html."},{"key":"e_1_3_2_2_8_1","unstructured":"2019.7.12. Fuzzy Join in Power Query. https:\/\/support.microsoft.com\/enus\/ office\/fuzzy-match-support-for-get-transform-power-query-ffdd5082- c0c8--4c8e-a794-bd3962b90649.  2019.7.12. Fuzzy Join in Power Query. https:\/\/support.microsoft.com\/enus\/ office\/fuzzy-match-support-for-get-transform-power-query-ffdd5082- c0c8--4c8e-a794-bd3962b90649."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1132516.1132557"},{"key":"e_1_3_2_2_10_1","volume-title":"David Menestrina, Aditya G. Parameswaran, and Jeffrey D. Ullman.","author":"Afrati Foto N.","year":"2012","unstructured":"Foto N. Afrati , Anish Das Sarma , David Menestrina, Aditya G. Parameswaran, and Jeffrey D. Ullman. 2012 . Fuzzy Joins Using MapReduce. In Proceedings of ICDE. Foto N. Afrati, Anish Das Sarma, David Menestrina, Aditya G. Parameswaran, and Jeffrey D. Ullman. 2012. Fuzzy Joins Using MapReduce. In Proceedings of ICDE."},{"key":"e_1_3_2_2_11_1","volume-title":"Proceedings of VLDB.","author":"Arasu Arvind","year":"2006","unstructured":"Arvind Arasu , Venkatesh Ganti , and Raghav Kaushik . 2006 . Efficient Exact Set-Similarity Joins . In Proceedings of VLDB. Arvind Arasu, Venkatesh Ganti, and Raghav Kaushik. 2006. Efficient Exact Set-Similarity Joins. In Proceedings of VLDB."},{"key":"e_1_3_2_2_12_1","unstructured":"Ricardo Baeza-Yates Berthier Ribeiro-Neto etal 1999. Modern information retrieval. Vol. 463. ACM press New York.  Ricardo Baeza-Yates Berthier Ribeiro-Neto et al. 1999. Modern information retrieval. Vol. 463. ACM press New York."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1242572.1242591"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2006.13"},{"key":"e_1_3_2_2_15_1","unstructured":"Christian Bizer. 2014. Search Joins with the Web.. In ICDT. 3.  Christian Bizer. 2014. Search Joins with the Web.. In ICDT. 3."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2017.11.077"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/872757.872796"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2006.9"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.14778\/2983200.2983203"},{"key":"e_1_3_2_2_20_1","volume-title":"modAL: A modular active learning framework for Python. arXiv preprint arXiv:1805.00979","author":"Danka Tivadar","year":"2018","unstructured":"Tivadar Danka and Peter Horvath . 2018. modAL: A modular active learning framework for Python. arXiv preprint arXiv:1805.00979 ( 2018 ). Tivadar Danka and Peter Horvath. 2018. modAL: A modular active learning framework for Python. arXiv preprint arXiv:1805.00979 (2018)."},{"key":"e_1_3_2_2_21_1","unstructured":"Sanjib Das AnHai Doan Paul Suganthan G. C. Chaitanya Gokhale Pradap Konda Yash Govind and Derek Paulsen. 2019.07.12. The Magellan Data Repository. https:\/\/sites.google.com\/site\/anhaidgroup\/projects\/data.  Sanjib Das AnHai Doan Paul Suganthan G. C. Chaitanya Gokhale Pradap Konda Yash Govind and Derek Paulsen. 2019.07.12. The Magellan Data Repository. https:\/\/sites.google.com\/site\/anhaidgroup\/projects\/data."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.14778\/2732977.2732981"},{"key":"e_1_3_2_2_23_1","unstructured":"Jonathan De Bruin. 2015. Probabilistic record linkage with the Fellegi and Sunter framework: Using probabilistic record linkage to link privacy preserved police and hospital road accident records. (2015).  Jonathan De Bruin. 2015. Probabilistic record linkage with the Fellegi and Sunter framework: Using probabilistic record linkage to link privacy preserved police and hospital road accident records. (2015)."},{"key":"#cr-split#-e_1_3_2_2_24_1.1","unstructured":"J De Bruin. 2019. Python Record Linkage Toolkit: A toolkit for record linkage and duplicate detection in Python. https:\/\/doi.org\/10.5281\/zenodo.3559043 10.5281\/zenodo.3559043"},{"key":"#cr-split#-e_1_3_2_2_24_1.2","unstructured":"J De Bruin. 2019. Python Record Linkage Toolkit: A toolkit for record linkage and duplicate detection in Python. https:\/\/doi.org\/10.5281\/zenodo.3559043"},{"key":"e_1_3_2_2_25_1","volume-title":"Proceedings of ICDE.","author":"Deng Dong","year":"2013","unstructured":"Dong Deng , Guoliang Li , Shuang Hao , Jiannan Wang , and Jianhua Feng . 2013 . MassJoin: A MapReduce-based Algorithm for String Similarity Joins . In Proceedings of ICDE. Dong Deng, Guoliang Li, Shuang Hao, Jiannan Wang, and Jianhua Feng. 2013. MassJoin: A MapReduce-based Algorithm for String Similarity Joins. In Proceedings of ICDE."},{"volume-title":"Principles of data integration","author":"Doan AnHai","key":"e_1_3_2_2_26_1","unstructured":"AnHai Doan , Alon Halevy , and Zachary Ives . 2012. Principles of data integration . Elsevier . AnHai Doan, Alon Halevy, and Zachary Ives. 2012. Principles of data integration. Elsevier."},{"key":"e_1_3_2_2_27_1","first-page":"1","article-title":"Duplicate Record Detection: A Survey","volume":"19","author":"Elmagarmid Ahmed K","year":"2007","unstructured":"Ahmed K Elmagarmid , Panagiotis G Ipeirotis , and Vassilios S Verykios . 2007 . Duplicate Record Detection: A Survey . IEEETKDE 19 , 1 (2007), 1 -- 16 . Ahmed K Elmagarmid, Panagiotis G Ipeirotis, and Vassilios S Verykios. 2007. Duplicate Record Detection: A Survey. IEEETKDE 19, 1 (2007), 1--16.","journal-title":"IEEETKDE"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1969.10501049"},{"key":"e_1_3_2_2_29_1","volume-title":"Proceedings of the International Workshop on Bioinformatics Research and Applications (IWBRA). Citeseer, 1--12","author":"Hajiaghayi MT","year":"2006","unstructured":"MT Hajiaghayi , K Jain , K Konwar , LC Lau , II Mandoiu , A Russell , A Shvartsman , and VV Vazirani . 2006 . The minimum k-colored subgraph problem in haplotyping and DNA primer selection . In Proceedings of the International Workshop on Bioinformatics Research and Applications (IWBRA). Citeseer, 1--12 . MT Hajiaghayi, K Jain, K Konwar, LC Lau, II Mandoiu, A Russell, A Shvartsman, and VV Vazirani. 2006. The minimum k-colored subgraph problem in haplotyping and DNA primer selection. In Proceedings of the International Workshop on Bioinformatics Research and Applications (IWBRA). Citeseer, 1--12."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.14778\/2824032.2824036"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.14778\/2994509.2994535"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.14778\/1920841.1920904"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.3233\/SW-140134"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.websem.2015.05.001"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.14778\/2078331.2078340"},{"key":"e_1_3_2_2_36_1","unstructured":"Peng Li Xiang Cheng Xu Chu Yeye He and Surajit Chaudhuri. 2021. Auto- FuzzyJoin: Auto-Program Fuzzy Similarity Joins Without Labeled Examples. arXiv:arXiv:2103.04489  Peng Li Xiang Cheng Xu Chu Yeye He and Surajit Chaudhuri. 2021. Auto- FuzzyJoin: Auto-Program Fuzzy Similarity Joins Without Labeled Examples. arXiv:arXiv:2103.04489"},{"key":"e_1_3_2_2_37_1","volume-title":"VLDB 2021","author":"Li Yuliang","year":"2021","unstructured":"Yuliang Li , Jinfeng Li , Yoshihiko Suhara , AnHai Doan , and Wang-Chiew Tan . 2021 . Deep entity matching with pre-trained language models . VLDB 2021 (2021). Yuliang Li, Jinfeng Li, Yoshihiko Suhara, AnHai Doan, and Wang-Chiew Tan. 2021. Deep entity matching with pre-trained language models. VLDB 2021 (2021)."},{"volume-title":"Text information retrieval systems","author":"Meadow Charles T","key":"e_1_3_2_2_38_1","unstructured":"Charles T Meadow , Donald H Kraft , and Bert R Boyce . 1999. Text information retrieval systems . Academic Press, Inc. Charles T Meadow, Donald H Kraft, and Bert R Boyce. 1999. Text information retrieval systems. Academic Press, Inc."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.14778\/2212351.2212353"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3196926"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.14778\/2947618.2947624"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2010.5447873"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/2213836.2213935"},{"key":"e_1_3_2_2_44_1","volume-title":"Parallel set similarity join on big data based on locality-sensitive hashing. Science of computer programming 145","author":"Sohrabi Mohammad Karim","year":"2017","unstructured":"Mohammad Karim Sohrabi and Hosseion Azgomi . 2017. Parallel set similarity join on big data based on locality-sensitive hashing. Science of computer programming 145 ( 2017 ), 1--12. Mohammad Karim Sohrabi and Hosseion Azgomi. 2017. Parallel set similarity join on big data based on locality-sensitive hashing. Science of computer programming 145 (2017), 1--12."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/1807167.1807222"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2213836.2213847"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.5555\/1182635.1164157"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389743"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/2000824.2000825"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11704-015-5900-5"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313578"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.14778\/3115404.3115409"}],"event":{"name":"SIGMOD\/PODS '21: International Conference on Management of Data","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"],"location":"Virtual Event China","acronym":"SIGMOD\/PODS '21"},"container-title":["Proceedings of the 2021 International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3448016.3452824","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3448016.3452824","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:28:05Z","timestamp":1750195685000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3448016.3452824"}},"subtitle":["Auto-Program Fuzzy Similarity Joins Without Labeled Examples"],"short-title":[],"issued":{"date-parts":[[2021,6,9]]},"references-count":53,"alternative-id":["10.1145\/3448016.3452824","10.1145\/3448016"],"URL":"https:\/\/doi.org\/10.1145\/3448016.3452824","relation":{},"subject":[],"published":{"date-parts":[[2021,6,9]]},"assertion":[{"value":"2021-06-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}