{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T19:04:48Z","timestamp":1778267088511,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":58,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"the National Natural Science Foundation of China","award":["62172082, 62072084, 62072086"],"award-info":[{"award-number":["62172082, 62072084, 62072086"]}]},{"name":"the National Key Research and Development Program of China","award":["2023YFB4503600"],"award-info":[{"award-number":["2023YFB4503600"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3679843","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:11Z","timestamp":1729452851000},"page":"508-518","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Enhancing Deep Entity Resolution with Integrated Blocker-Matcher Training: Balancing Consensus and Discrepancy"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-5823-1965","authenticated-orcid":false,"given":"Wenzhou","family":"Dou","sequence":"first","affiliation":[{"name":"Northeastern University, Shenyang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0310-6372","authenticated-orcid":false,"given":"Derong","family":"Shen","sequence":"additional","affiliation":[{"name":"Northeastern University, Shenyang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1302-818X","authenticated-orcid":false,"given":"Xiangmin","family":"Zhou","sequence":"additional","affiliation":[{"name":"RMIT University, Melbourne, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9582-6425","authenticated-orcid":false,"given":"Hui","family":"Bai","sequence":"additional","affiliation":[{"name":"Northeastern University, Shenyang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5307-4893","authenticated-orcid":false,"given":"Yue","family":"Kou","sequence":"additional","affiliation":[{"name":"Northeastern University, Shenyang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0166-1324","authenticated-orcid":false,"given":"Tiezheng","family":"Nie","sequence":"additional","affiliation":[{"name":"Northeastern University, Shenyang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0987-3743","authenticated-orcid":false,"given":"Hang","family":"Cui","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Urbana, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3171-8889","authenticated-orcid":false,"given":"Ge","family":"Yu","sequence":"additional","affiliation":[{"name":"Northeastern University, Shenyang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.5555\/1105926.1106227"},{"key":"e_1_3_2_1_2_1","volume-title":"SC-Block: Supervised Contrastive Blocking within Entity Resolution Pipelines. arXiv preprint arXiv:2303.03132","author":"Brinkmann Alexander","year":"2023","unstructured":"Alexander Brinkmann, Roee Shraga, and Christian Bizer. 2023. SC-Block: Supervised Contrastive Blocking within Entity Resolution Pipelines. arXiv preprint arXiv:2303.03132 (2023)."},{"key":"e_1_3_2_1_3_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems Vol. 33 (2020) 1877--1901."},{"key":"e_1_3_2_1_4_1","volume-title":"Seed: Simple, efficient, and effective data management via large language models. arXiv preprint arXiv:2310.00749","author":"Hen Zui","year":"2023","unstructured":"Zui CHen, Lei Cao, Sam Madden, Ju Fan, Nan Tang, Zihui Gu, Zeyuan Shang, Chunwei Liu, Michael Cafarella, and Tim Kraska. 2023. Seed: Simple, efficient, and effective data management via large language models. arXiv preprint arXiv:2310.00749 (2023)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3035918.3035960"},{"key":"e_1_3_2_1_6_1","volume-title":"QLoRA: Efficient Finetuning of Quantized LLMs. arXiv preprint arXiv:2305.14314","author":"Dettmers Tim","year":"2023","unstructured":"Tim Dettmers, Artidoro Pagnoni, Ari Holtzman, and Luke Zettlemoyer. 2023. QLoRA: Efficient Finetuning of Quantized LLMs. arXiv preprint arXiv:2305.14314 (2023)."},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers). 4171--4186."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-00129-1_4"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i4.25544"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.14778\/3236187.3236198"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE60146.2024.00284"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.14778\/1687627.1687674"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1969.10501049"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the Twenty-Ninth International Conference on International Joint Conferences on Artificial Intelligence. 3665--3671","author":"Fu Cheng","year":"2021","unstructured":"Cheng Fu, Xianpei Han, Jiaming He, and Le Sun. 2021. Hierarchical matching network for heterogeneous entity resolution. In Proceedings of the Twenty-Ninth International Conference on International Joint Conferences on Artificial Intelligence. 3665--3671."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3134806"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2588555.2588576"},{"key":"e_1_3_2_1_17_1","first-page":"491","article-title":"Approximate string joins in a database (almost) for free","volume":"1","author":"Gravano Luis","year":"2001","unstructured":"Luis Gravano, Panagiotis G Ipeirotis, Hosagrahar Visvesvaraya Jagadish, Nick Koudas, Shanmugauelayut Muthukrishnan, Divesh Srivastava, et al. 2001. Approximate string joins in a database (almost) for free. In VLDB, Vol. 1. 491--500.","journal-title":"VLDB"},{"key":"e_1_3_2_1_18_1","volume-title":"The Eleventh International Conference on Learning Representations.","author":"He Pengcheng","year":"2023","unstructured":"Pengcheng He, Jianfeng Gao, and Weizhu Chen. 2023. DeBERTaV3: improving DeBERTa using ELECTRA-style pre-training with gradient-disentangled embedding sharing. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/568271.223807"},{"key":"e_1_3_2_1_20_1","volume-title":"Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. 2015. Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE48307.2020.00011"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Arjit Jain Sunita Sarawagi and Prithviraj Sen. 2022. Deep Indexed Active Learning for Matching Heterogeneous Entity Representations. In VLDB.","DOI":"10.14778\/3485450.3485455"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICWR.2019.8765267"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1586"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.14778\/3007263.3007314"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.14778\/1920841.1920904"},{"key":"e_1_3_2_1_27_1","volume-title":"Annual Meeting of the Association for Computational Linguistics. https:\/\/api.semanticscholar.org\/CorpusID:204960716","author":"Lewis Mike","year":"2019","unstructured":"Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdel rahman Mohamed, Omer Levy, Veselin Stoyanov, and Luke Zettlemoyer. 2019. BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension. In Annual Meeting of the Association for Computational Linguistics. https:\/\/api.semanticscholar.org\/CorpusID:204960716"},{"key":"e_1_3_2_1_28_1","volume-title":"Deep entity matching with pre-trained language models. arXiv preprint arXiv:2004.00584","author":"Li Yuliang","year":"2020","unstructured":"Yuliang Li, Jinfeng Li, Yoshihiko Suhara, AnHai Doan, and Wang-Chiew Tan. 2020. Deep entity matching with pre-trained language models. arXiv preprint arXiv:2004.00584 (2020)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00197"},{"key":"e_1_3_2_1_30_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3196926"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3584014.3584017"},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of the 32nd ACM International Conference on Information and Knowledge Management. 4215--4219","author":"Ali Naeim","year":"2023","unstructured":"Ali Naeim abadi, Mir Tafseer Nayeem, and Davood Rafiei. 2023. Product Entity Matching via Tabular Data. In Proceedings of the 32nd ACM International Conference on Information and Knowledge Management. 4215--4219."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.14778\/3574245.3574258"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358018"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/1935826.1935903"},{"key":"e_1_3_2_1_37_1","volume-title":"Supervised Contrastive Learning for Product Matching. In Companion Proceedings of the Web Conference","author":"Peeters Ralph","year":"2022","unstructured":"Ralph Peeters and Christian Bizer. 2022. Supervised Contrastive Learning for Product Matching. In Companion Proceedings of the Web Conference 2022. 248--251."},{"key":"e_1_3_2_1_38_1","volume-title":"Entity matching using large language models. arXiv preprint arXiv:2310.11244","author":"Peeters Ralph","year":"2023","unstructured":"Ralph Peeters and Christian Bizer. 2023. Entity matching using large language models. arXiv preprint arXiv:2310.11244 (2023)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"key":"e_1_3_2_1_41_1","volume-title":"a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108","author":"Sanh Victor","year":"2019","unstructured":"Victor Sanh, Lysandre Debut, Julien Chaumond, and Thomas Wolf. 2019. DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108 (2019)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.14778\/3149193.3149199"},{"key":"e_1_3_2_1_43_1","volume-title":"How does prompt engineering affect ChatGPT performance on unsupervised entity resolution? ArXiv","author":"Sisaengsuwanchai Khanin","year":"2023","unstructured":"Khanin Sisaengsuwanchai, Navapat Nananukul, and Mayank Kejriwal. 2023. How does prompt engineering affect ChatGPT performance on unsupervised entity resolution? ArXiv, Vol. abs\/2310.06174 (2023). https:\/\/api.semanticscholar.org\/CorpusID:263829241"},{"key":"e_1_3_2_1_44_1","volume-title":"Juliette Love, et al.","author":"Team Gemma","year":"2024","unstructured":"Gemma Team, Thomas Mesnard, Cassidy Hardin, Robert Dadashi, Surya Bhupatiraju, Shreya Pathak, Laurent Sifre, Morgane Rivi\u00e8re, Mihir Sanjay Kale, Juliette Love, et al. 2024. Gemma: Open models based on gemini research and technology. arXiv preprint arXiv:2403.08295 (2024)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.14778\/3476249.3476294"},{"key":"e_1_3_2_1_46_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.14778\/2732977.2732982"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.14778\/2350229.2350263"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.14778\/3565816.3565836"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE55515.2023.00391"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3069908"},{"key":"e_1_3_2_1_52_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=gEZrGCozdqR","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Maarten Bosma, Vincent Zhao, Kelvin Guu, Adams Wei Yu, Brian Lester, Nan Du, Andrew M. Dai, and Quoc V Le. 2022. Finetuned Language Models are Zero-Shot Learners. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=gEZrGCozdqR"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.14778\/3632093.3632096"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3517872"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00396"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380017"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3336191.3371813"},{"key":"e_1_3_2_1_58_1","volume-title":"Proceedings of International Conference on Learning Representations (ICLR).","author":"Zhou Helong","year":"2021","unstructured":"Helong Zhou and Liangchen Song. 2021. Rethinking Soft Labels for Knowledge Distillation: A Bias--Variance Tradeoff Perspective. In Proceedings of International Conference on Learning Representations (ICLR)."}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","location":"Boise ID USA","acronym":"CIKM '24","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679843","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3679843","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:08Z","timestamp":1750294688000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679843"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":58,"alternative-id":["10.1145\/3627673.3679843","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3679843","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}