{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T20:22:41Z","timestamp":1776284561881,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":74,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,6,10]],"date-time":"2022-06-10T00:00:00Z","timestamp":1654819200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"ERC","award":["652976"],"award-info":[{"award-number":["652976"]}]},{"name":"NSFC","award":["61902274"],"award-info":[{"award-number":["61902274"]}]},{"name":"Science And Technology Innovation Bureau Of Longhua District, Shenzhen","award":["LHKJCXJCYJ202003"],"award-info":[{"award-number":["LHKJCXJCYJ202003"]}]},{"name":"Royal Society Wolfson Research Merit Award","award":["WRM\/R1\/180014"],"award-info":[{"award-number":["WRM\/R1\/180014"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,6,10]]},"DOI":"10.1145\/3514221.3526165","type":"proceedings-article","created":{"date-parts":[[2022,6,12]],"date-time":"2022-06-12T02:33:49Z","timestamp":1655001229000},"page":"384-398","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":21,"title":["Parallel Rule Discovery from Large Datasets by Sampling"],"prefix":"10.1145","author":[{"given":"Wenfei","family":"Fan","sequence":"first","affiliation":[{"name":"Shenzhen Institute of Computing Sciences, University of Edinburgh, &amp; Beihang University, Shenzhen, China"}]},{"given":"Ziyan","family":"Han","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}]},{"given":"Yaoshu","family":"Wang","sequence":"additional","affiliation":[{"name":"Shenzhen Institute of Computing Sciences, Shenzhen, China"}]},{"given":"Min","family":"Xie","sequence":"additional","affiliation":[{"name":"Shenzhen Institute of Computing Sciences, Shenzhen, China"}]}],"member":"320","published-online":{"date-parts":[[2022,6,11]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/2661829.2661884"},{"key":"e_1_3_2_1_2_1","volume-title":"Foundations of Databases","author":"Abiteboul Serge","unstructured":"Serge Abiteboul, Richard Hull, and Victor Vianu. 1995. Foundations of Databases .Addison-Wesley."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Marcelo Arenas Leopoldo Bertossi and Jan Chomicki. 1999. Consistent Query Answers in Inconsistent Databases. In PODS.","DOI":"10.1145\/303976.303983"},{"key":"e_1_3_2_1_4_1","volume-title":"Collective entity resolution in relational data. TKDD","author":"Bhattacharya Indrajit","year":"2007","unstructured":"Indrajit Bhattacharya and Lise Getoor. 2007. Collective entity resolution in relational data. TKDD (2007)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.14778\/3157794.3157800"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Venkatesan T. Chakaravarthy Vinayaka Pandit and Yogish Sabharwal. 2009. Analysis of sampling techniques for association rule mining. In ICDT. 276--283.","DOI":"10.1145\/1514894.1514927"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2010.07.008"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Bin Chen Peter J. Haas and Peter Scheuermann. 2002. A new two-phase sampling based algorithm for discovering association rules. In SIGKDD.","DOI":"10.1145\/775107.775114"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2011.04.018"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3418896"},{"key":"e_1_3_2_1_11_1","volume-title":"Discovering Denial Constraints. PVLDB","author":"Chu Xu","year":"2013","unstructured":"Xu Chu, Ihab F. Ilyas, and Paolo Papotti. 2013. Discovering Denial Constraints. PVLDB (2013)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Kun-Ta Chuang Ming-Syan Chen and Wen-Chieh Yang. 2005. Progressive Sampling for Association Rules Based on Sampling Error Estimation. In PAKDD.","DOI":"10.1007\/11430919_59"},{"key":"e_1_3_2_1_13_1","volume-title":"Relational Completeness of Data Base Sublanguages. Database Systems: 65--98, Prentice Hall and IBM Research Report RJ 987","author":"Codd E. F.","year":"1972","unstructured":"E. F. Codd. 1972. Relational Completeness of Data Base Sublanguages. Database Systems: 65--98, Prentice Hall and IBM Research Report RJ 987 (1972)."},{"key":"e_1_3_2_1_14_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT. 4171--4186.","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT. 4171--4186."},{"key":"e_1_3_2_1_15_1","unstructured":"Wenfei Fan Wenzhi Fu Ruochun Jin Ling Lu and Chao Tian. 2022. Discovering Association Rules from Big Graphs. In VLDB."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-010-0206-6"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1366102.1366103"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2010.154"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Wenfei Fan Xueli Liu and Yingjie Cao. 2018. Parallel Reasoning of Graph Functional Dependencies. In ICDE. 593--604.","DOI":"10.1109\/ICDE.2018.00060"},{"key":"e_1_3_2_1_20_1","volume-title":"Unifying Logic Rules and Machine Learning for Entity Enhancing. Science China Information Sciences","author":"Fan Wenfei","year":"2020","unstructured":"Wenfei Fan, Ping Lu, and Chao Tian. 2020. Unifying Logic Rules and Machine Learning for Entity Enhancing. Science China Information Sciences (2020)."},{"key":"e_1_3_2_1_21_1","volume-title":"Discrepancy Detection and Incremental Detection. PVLDB","author":"Fan Wenfei","year":"2021","unstructured":"Wenfei Fan, Chao Tian, Yanghao Wang, and Qiang Yin. 2021. Discrepancy Detection and Incremental Detection. PVLDB (2021)."},{"key":"e_1_3_2_1_22_1","volume-title":"Database dependency discovery: A machine learning approach. AI communications","author":"Flach Peter A","year":"1999","unstructured":"Peter A Flach and Iztok Savnik. 1999. Database dependency discovery: A machine learning approach. AI communications, Vol. 12, 3 (1999), 139--160."},{"key":"e_1_3_2_1_23_1","volume-title":"Parallel mining of dependencies","author":"Garnaud Eve","unstructured":"Eve Garnaud, Nicolas Hanusse, Sofian Maabout, and No\u00ebl Novelli. 2014. Parallel mining of dependencies. In HPCS. IEEE, 491--498."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.14778\/3364324.3364332"},{"key":"e_1_3_2_1_25_1","volume-title":"On generating near-optimal tableaux for conditional functional dependencies. VLDB","author":"Golab Lukasz","year":"2008","unstructured":"Lukasz Golab, Howard Karloff, Flip Korn, Divesh Srivastava, and Bei Yu. 2008. On generating near-optimal tableaux for conditional functional dependencies. VLDB (2008)."},{"key":"e_1_3_2_1_26_1","unstructured":"Han He. 2020. HanLP: Han Language Processing. https:\/\/github.com\/hankcs\/HanLP"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Alireza Heidari Joshua McGrath Ihab F. Ilyas and Theodoros Rekatsinas. 2019. HoloDetect: Few-Shot Learning for Error Detection. In SIGMOD.","DOI":"10.1145\/3299869.3319888"},{"key":"e_1_3_2_1_28_1","unstructured":"Xuegang Hu and Haitao Yu. 2006. The Research of Sampling for Mining Frequent Itemsets. In Rough Sets and Knowledge Technology (RSKT)."},{"key":"e_1_3_2_1_29_1","volume-title":"TANE: An Efficient Algorithm for Discovering Functional and Approximate Dependencies. Comput. J.","author":"Juha","year":"1999","unstructured":"Yk\"a Huhtala, Juha K\"a rkk\"a inen, Pasi Porkka, and Hannu Toivonen. 1999. TANE: An Efficient Algorithm for Discovering Functional and Approximate Dependencies. Comput. J. (1999)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Wontae Hwang and Dongseung Kim. 2006. Improved Association Rule Mining by Modified Trimming. In Computer and Information Technology (CIT).","DOI":"10.1109\/CIT.2006.101"},{"key":"e_1_3_2_1_31_1","unstructured":"Caiyan Jia and Ruqian Lu. 2005. Sampling Ensembles for Frequent Patterns. In Fuzzy Systems and Knowledge Discovery (FSKD)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.14778\/1920841.1920904"},{"key":"e_1_3_2_1_33_1","first-page":"712","article-title":"MDedup: Duplicate detection with matching dependencies","volume":"13","author":"Papenbrock Thorsten","year":"2020","unstructured":"loannis Koumarelas, Thorsten Papenbrock, and Felix Naumann. 2020. MDedup: Duplicate detection with matching dependencies. PVLDB, Vol. 13, 5 (2020), 712--725.","journal-title":"PVLDB"},{"key":"e_1_3_2_1_34_1","volume-title":"Efficient discovery of approximate dependencies. VLDB","author":"Kruse Sebastian","year":"2018","unstructured":"Sebastian Kruse and Felix Naumann. 2018. Efficient discovery of approximate dependencies. VLDB (2018)."},{"key":"e_1_3_2_1_35_1","volume-title":"A complexity theory of efficient parallel algorithms. TCS","author":"Kruskal Clyde P","year":"1990","unstructured":"Clyde P Kruskal, Larry Rudolph, and Marc Snir. 1990. A complexity theory of efficient parallel algorithms. TCS (1990)."},{"key":"e_1_3_2_1_36_1","volume-title":"Improving the Efficiency and Effectiveness for BERT-based Entity Resolution","author":"Li Bing","unstructured":"Bing Li, Yukai Miao, Yaoshu Wang, Yifang Sun, and Wei Wang. 2021. Improving the Efficiency and Effectiveness for BERT-based Entity Resolution. In AAAI. AAAI Press, 13226--13233."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3284551"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Weibang Li Zhanhuai Li Qun Chen Tao Jiang and Hailong Liu. 2015. Discovering functional dependencies in vertically distributed big data. In WISE. 199--207.","DOI":"10.1007\/978-3-319-26187-4_15"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Weibang Li Zhanhuai Li Qun Chen Tao Jiang and Zhilei Yin. 2016. Discovering approximate functional dependencies from distributed big data. In APWeb. 289--301.","DOI":"10.1007\/978-3-319-45817-5_23"},{"key":"e_1_3_2_1_40_1","volume-title":"Gopalan","author":"Li Yanrong","year":"2004","unstructured":"Yanrong Li and Raj P. Gopalan. 2004. Effective Sampling for Mining Association Rules. In Advances in Artificial Intelligence."},{"key":"e_1_3_2_1_41_1","volume-title":"Gopalan","author":"Li Yanrong","year":"2005","unstructured":"Yanrong Li and Raj P. Gopalan. 2005. Stratified Sampling for Association Rules Mining. In Artificial Intelligence Applications and Innovations (IFIP)."},{"key":"e_1_3_2_1_42_1","volume-title":"Deep Entity Matching with Pre-Trained Language Models. arXiv preprint arXiv:2004.00584","author":"Li Yuliang","year":"2020","unstructured":"Yuliang Li, Jinfeng Li, Yoshihiko Suhara, AnHai Doan, and Wang-Chiew Tan. 2020. Deep Entity Matching with Pre-Trained Language Models. arXiv preprint arXiv:2004.00584 (2020)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.14778\/3401960.3401966"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-46439-5_24"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1177\/0165551508100382"},{"key":"e_1_3_2_1_46_1","unstructured":"Heikki Mannila Hannu Toivonen and A. Inkeri Verkamo. 1994. Efficient Algorithms for Discovering Association Rules. In Knowledge Discovery in Databases."},{"key":"e_1_3_2_1_47_1","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei A. Rusu Joel Veness Marc G. Bellemare Alex Graves Martin A. Riedmiller Andreas Fidjeland Georg Ostrovski Stig Petersen Charles Beattie Amir Sadik Ioannis Antonoglou Helen King Dharshan Kumaran Daan Wierstra Shane Legg and Demis Hassabis. 2015. Human-level control through deep reinforcement learning. Nat. (2015)."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1080\/00107510500052444"},{"key":"e_1_3_2_1_49_1","volume-title":"Fun: An efficient algorithm for mining functional and embedded dependencies","author":"Novelli Noel","year":"2001","unstructured":"Noel Novelli and Rosine Cicchetti. 2001. Fun: An efficient algorithm for mining functional and embedded dependencies. In ICDT. Springer, 189--203."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Thorsten Papenbrock and Felix Naumann. 2016. A Hybrid Approach to Functional Dependency Discovery. In SIGMOD.","DOI":"10.1145\/2882903.2915203"},{"key":"e_1_3_2_1_51_1","volume-title":"Efficient Progressive Sampling for Association Rules. In International Conference on Data Mining (ICDM). 354--361","author":"Parthasarathy Srinivasan","year":"2002","unstructured":"Srinivasan Parthasarathy. 2002. Efficient Progressive Sampling for Association Rules. In International Conference on Data Mining (ICDM). 354--361."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.14778\/3368289.3368293"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.14778\/3377369.3377377"},{"key":"e_1_3_2_1_54_1","volume-title":"Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. CoRR","author":"Reimers Nils","year":"2019","unstructured":"Nils Reimers and Iryna Gurevych. 2019. Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. CoRR, Vol. abs\/1908.10084 (2019). arxiv: 1908.10084"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.14778\/3137628.3137631"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"Matteo Riondato and Eli Upfal. 2015. Mining Frequent Itemsets through Progressive Sampling with Rademacher Averages. In SIGKDD.","DOI":"10.1145\/2783258.2783265"},{"key":"e_1_3_2_1_57_1","volume-title":"2019 a. Distributed discovery of functional dependencies","author":"Saxena Hemant","unstructured":"Hemant Saxena, Lukasz Golab, and Ihab F Ilyas. 2019 a. Distributed discovery of functional dependencies. In ICDE. IEEE, 1590--1593."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.14778\/3342263.3342638"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3392778"},{"key":"e_1_3_2_1_60_1","unstructured":"Philipp Schirmer Thorsten Papenbrock Sebastian Kruse Felix Naumann Dennis Hempfing Torben Mayer and Daniel Neusch\"afer-Rube. 2019. DynFD: Functional Dependency Discovery in Dynamic Datasets. In EDBT."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"crossref","unstructured":"Michael Sejr Schlichtkrull Thomas N. Kipf Peter Bloem Rianne van den Berg Ivan Titov and Max Welling. 2018. Modeling Relational Data with Graph Convolutional Networks. In ESWC.","DOI":"10.1007\/978-3-319-93417-4_38"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.14778\/3149193.3149199"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"crossref","unstructured":"Shaoxu Song and Lei Chen. 2009. Discovering matching dependencies. In CIKM.","DOI":"10.1145\/1645953.1646135"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/1401890.1402008"},{"key":"e_1_3_2_1_65_1","volume-title":"Pieter Abbeel, and Daphne Koller.","author":"Taskar Benjamin","year":"2003","unstructured":"Benjamin Taskar, Ming Fai Wong, Pieter Abbeel, and Daphne Koller. 2003. Link Prediction in Relational Data. In NIPS. 659--666."},{"key":"e_1_3_2_1_66_1","unstructured":"Hannu Toivonen. 1996. Sampling Large Databases for Association Rules. In VLDB. Morgan Kaufmann 134--145."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/79173.79181"},{"key":"e_1_3_2_1_68_1","volume-title":"Robertson","author":"Wyss Catharine M.","year":"2001","unstructured":"Catharine M. Wyss, Chris Giannella, and Edward L. Robertson. 2001. FastFDs: A Heuristic-Driven, Depth-First Algorithm for Mining Functional Dependencies from Relation Instances - Extended Abstract. In DaWak."},{"key":"e_1_3_2_1_69_1","volume-title":"Liang Jeff Chen, and Zheng Zhang","author":"Yan Ying","year":"2014","unstructured":"Ying Yan, Liang Jeff Chen, and Zheng Zhang. 2014. Error-bounded Sampling for Analytics on Big Sparse Data. PVLDB (2014)."},{"key":"e_1_3_2_1_70_1","volume-title":"Discovering functional dependencies in a database using equivalences","author":"Yao H","unstructured":"H Yao, H Hamilton, and C Butz. 2002. Fd_mine: Discovering functional dependencies in a database using equivalences. In IEEE ICDM. 1--15."},{"key":"e_1_3_2_1_71_1","unstructured":"Juntao Yu Bernd Bohnet and Massimo Poesio. 2020. Named Entity Recognition as Dependency Parsing. In ACL."},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1020995206763"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"crossref","unstructured":"Yunjia Zhang Zhihan Guo and Theodoros Rekatsinas. 2020. A Statistical Perspective on Discovering Functional Dependencies in Noisy Data. In SIGMOD. 861--876.","DOI":"10.1145\/3318464.3389749"},{"key":"e_1_3_2_1_74_1","unstructured":"Yanchang Zhao Chengqi Zhang and Shichao Zhang. 2006. Efficient Frequent Itemsets Mining by Sampling. In Advances in Intelligent (IT)."}],"event":{"name":"SIGMOD\/PODS '22: International Conference on Management of Data","location":"Philadelphia PA USA","acronym":"SIGMOD\/PODS '22","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 2022 International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3514221.3526165","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3514221.3526165","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:10:13Z","timestamp":1750183813000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3514221.3526165"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,10]]},"references-count":74,"alternative-id":["10.1145\/3514221.3526165","10.1145\/3514221"],"URL":"https:\/\/doi.org\/10.1145\/3514221.3526165","relation":{},"subject":[],"published":{"date-parts":[[2022,6,10]]},"assertion":[{"value":"2022-06-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}