{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T18:18:45Z","timestamp":1771957125655,"version":"3.50.1"},"reference-count":63,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,7,9]],"date-time":"2024-07-09T00:00:00Z","timestamp":1720483200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,7,9]],"date-time":"2024-07-09T00:00:00Z","timestamp":1720483200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100018693","name":"HORIZON EUROPE Framework Programme","doi-asserted-by":"publisher","award":["101070122"],"award-info":[{"award-number":["101070122"]}],"id":[{"id":"10.13039\/100018693","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["The VLDB Journal"],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1007\/s00778-024-00868-7","type":"journal-article","created":{"date-parts":[[2024,7,9]],"date-time":"2024-07-09T10:09:57Z","timestamp":1720519797000},"page":"1671-1696","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Open benchmark for filtering techniques in entity resolution"],"prefix":"10.1007","volume":"33","author":[{"given":"Franziska","family":"Neuhof","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marco","family":"Fisichella","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7298-9431","authenticated-orcid":false,"given":"George","family":"Papadakis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Konstantinos","family":"Nikoletos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nikolaus","family":"Augsten","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wolfgang","family":"Nejdl","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Manolis","family":"Koubarakis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,7,9]]},"reference":[{"key":"868_CR1","doi-asserted-by":"crossref","unstructured":"Getoor, L., Machanavajjhala, A.: Entity Resolution: Theory, Practice and Open Challenges. PVLDB (2012)","DOI":"10.1145\/2487575.2506179"},{"key":"868_CR2","doi-asserted-by":"crossref","unstructured":"Dong, X.L., Srivastava, D.: Big Data Integration. Morgan and Claypool Publishers (2015)","DOI":"10.1007\/978-3-031-01853-4"},{"key":"868_CR3","doi-asserted-by":"crossref","unstructured":"Christen, P.: Data Matching. Springer (2012)","DOI":"10.1007\/978-3-642-31164-2"},{"key":"868_CR4","doi-asserted-by":"crossref","unstructured":"Elmagarmid, A., Ipeirotis, P., Verykios, V.: Duplicate record detection: a survey. TKDE 19(1) (2007)","DOI":"10.1109\/TKDE.2007.250581"},{"key":"868_CR5","doi-asserted-by":"crossref","unstructured":"Papadakis, G., Ioannou, E., Thanos,, E. Palpanas, T.: The Four Generations of Entity Resolution. Morgan & Claypool Publishers (2021)","DOI":"10.1007\/978-3-031-01878-7"},{"key":"868_CR6","doi-asserted-by":"crossref","unstructured":"Barlaug, N., Gulla, J.A.: Neural networks for entity matching: a survey. In: ACM TKDD (2021)","DOI":"10.1145\/3442200"},{"key":"868_CR7","doi-asserted-by":"crossref","unstructured":"Hassanzadeh, O., Chiang, F., Miller, R.J., Lee, H.C.: Framework for evaluating clustering algorithms in duplicate detection. PVLDB 2(1) (2009)","DOI":"10.14778\/1687627.1687771"},{"key":"868_CR8","doi-asserted-by":"crossref","unstructured":"Christen, P.: A survey of indexing techniques for scalable record linkage and deduplication. TKDE (2012)","DOI":"10.1109\/TKDE.2011.127"},{"issue":"11","key":"868_CR9","first-page":"2459","volume":"14","author":"S Thirumuruganathan","year":"2021","unstructured":"Thirumuruganathan, S., et al.: Deep learning for blocking in entity matching: a design space exploration. PVLDB 14(11), 2459\u20132472 (2021)","journal-title":"PVLDB"},{"key":"868_CR10","doi-asserted-by":"crossref","unstructured":"Papadakis, G., Svirsky,, J. Gal, A., Palpanas, T.: Comparative analysis of approximate blocking techniques for entity resolution. PVLDB 9(9) (2016)","DOI":"10.14778\/2947618.2947624"},{"issue":"9","key":"868_CR11","doi-asserted-by":"publisher","first-page":"636","DOI":"10.14778\/2947618.2947620","volume":"9","author":"W Mann","year":"2016","unstructured":"Mann, W., Augsten, N., Bouros, P.: An empirical evaluation of set similarity join techniques. Proc. VLDB Endow. 9(9), 636\u2013647 (2016)","journal-title":"Proc. VLDB Endow."},{"issue":"8","key":"868_CR12","doi-asserted-by":"publisher","first-page":"625","DOI":"10.14778\/2732296.2732299","volume":"7","author":"Y Jiang","year":"2014","unstructured":"Jiang, Y., Li, G., Feng, J., Li, W.: String similarity joins: an experimental evaluation. Proc. VLDB Endow. 7(8), 625\u2013636 (2014)","journal-title":"Proc. VLDB Endow."},{"key":"868_CR13","doi-asserted-by":"crossref","unstructured":"Aum\u00fcller, M., Bernhardsson, E., Faithfull, A.J.: Ann-benchmarks: a benchmarking tool for approximate nearest neighbor algorithms. Inf. Syst. 87 (2020)","DOI":"10.1016\/j.is.2019.02.006"},{"issue":"4","key":"868_CR14","doi-asserted-by":"publisher","first-page":"312","DOI":"10.14778\/2856318.2856326","volume":"9","author":"G Papadakis","year":"2015","unstructured":"Papadakis, G., Alexiou, G., Papastefanatos, G., Koutrika, G.: Schema-agnostic vs schema-based configurations for blocking methods on homogeneous data. Proc. VLDB Endow. 9(4), 312\u2013323 (2015)","journal-title":"Proc. VLDB Endow."},{"issue":"10","key":"868_CR15","first-page":"1110","volume":"11","author":"F Fier","year":"2018","unstructured":"Fier, F., Augsten, N., Bouros, P., Leser, U., Freytag, J.: Set similarity joins on mapreduce: an experimental survey. PVLDB 11(10), 1110\u20131122 (2018)","journal-title":"PVLDB"},{"key":"868_CR16","doi-asserted-by":"crossref","unstructured":"Papadakis, G., Fisichella, M., Schoger, F., Mandilaras, G., Augsten, N., Nejdl, W.: Benchmarking filtering techniques for entity resolution. In: ICDE (2023)","DOI":"10.1109\/ICDE55515.2023.00389"},{"key":"868_CR17","doi-asserted-by":"crossref","unstructured":"Vernica, R., Carey, M.J., Li, C.: Efficient parallel set-similarity joins using mapreduce. In: ACM SIGMOD, pp. 495\u2013506 (2010)","DOI":"10.1145\/1807167.1807222"},{"key":"868_CR18","doi-asserted-by":"crossref","unstructured":"Papadakis, G. et\u00a0al.: Three-dimensional entity resolution with jedai. Inf. Syst. 93 (2020)","DOI":"10.1016\/j.is.2020.101565"},{"issue":"4","key":"868_CR19","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1145\/3385658.3385664","volume":"48","author":"G Papadakis","year":"2019","unstructured":"Papadakis, G., Tsekouras, L., Thanos, E., Giannakopoulos, G., Palpanas, T., Koubarakis, M.: Domain- and structure-agnostic end-to-end entity resolution with jedai. SIGMOD Rec. 48(4), 30\u201336 (2019)","journal-title":"SIGMOD Rec."},{"issue":"6","key":"868_CR20","first-page":"1507","volume":"16","author":"D Paulsen","year":"2023","unstructured":"Paulsen, D., Govind, Y., Doan, A.: Sparkly: a simple yet surprisingly strong TF\/IDF blocker for entity matching. PVLDB 16(6), 1507\u20131519 (2023)","journal-title":"PVLDB"},{"issue":"12","key":"868_CR21","doi-asserted-by":"publisher","first-page":"1197","DOI":"10.14778\/2994509.2994535","volume":"9","author":"P Konda","year":"2016","unstructured":"Konda, P., et al.: Magellan: toward building entity matching management systems. Proc. VLDB Endow. 9(12), 1197\u20131208 (2016)","journal-title":"Proc. VLDB Endow."},{"key":"868_CR22","unstructured":"Brunner, U., Stockinger, K.: Entity matching with transformer architectures: a step forward in data integration. In: EDBT, pp. 463\u2013473 (2020)"},{"key":"868_CR23","doi-asserted-by":"crossref","unstructured":"Galhotra, S., Firmani, D., Saha, B., Srivastava, D.: BEER: blocking for effective entity resolution. In: SIGMOD, pp. 2711\u20132715 (2021)","DOI":"10.1145\/3448016.3452747"},{"key":"868_CR24","doi-asserted-by":"crossref","unstructured":"Galhotra, S., Firmani, D., Saha, B., Srivastava, D.: Efficient and effective ER with progressive blocking. VLDB J. 30(4), pp. 537\u2013557 (2021)","DOI":"10.1007\/s00778-021-00656-7"},{"key":"868_CR25","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511809071","volume-title":"Introduction to Information Retrieval","author":"C Manning","year":"2008","unstructured":"Manning, C., Raghavan, P., Sch\u00fctze, H.: Introduction to Information Retrieval. Cambridge University Press, Cambridge (2008)"},{"key":"868_CR26","doi-asserted-by":"crossref","unstructured":"Nanayakkara, C., Christen, P.: Locality sensitive hashing with temporal and spatial constraints for efficient population record linkage. In: ACM CIKM, pp. 4354\u20134358 (2022)","DOI":"10.1145\/3511808.3557631"},{"issue":"8","key":"868_CR27","first-page":"1946","volume":"26","author":"G Papadakis","year":"2014","unstructured":"Papadakis, G., Koutrika, G., Palpanas, T., Nejdl, W.: Meta-blocking: taking entity resolution to the next level. TKDE 26(8), 1946\u20131960 (2014)","journal-title":"TKDE"},{"issue":"9","key":"868_CR28","first-page":"1902","volume":"15","author":"L Gagliardelli","year":"2022","unstructured":"Gagliardelli, L., Papadakis, G., Simonini, G., Bergamaschi, S., Palpanas, T.: Generalized supervised meta-blocking. PVLDB 15(9), 1902\u20131910 (2022)","journal-title":"PVLDB"},{"issue":"12","key":"868_CR29","first-page":"1173","volume":"9","author":"G Simonini","year":"2016","unstructured":"Simonini, G., Bergamaschi, S., Jagadish, H.: BLAST: a loosely schema-aware meta-blocking approach for entity resolution. PVLDB 9(12), 1173\u20131184 (2016)","journal-title":"PVLDB"},{"key":"868_CR30","unstructured":"Gravano, L., et\u00a0al.: Approximate string joins in a database (almost) for free. In: VLDB, pp. 491\u2013500 (2001)"},{"key":"868_CR31","doi-asserted-by":"crossref","unstructured":"Augsten, N., B\u00f6hlen, M.H.: Similarity Joins in Relational Database Systems. Morgan & Claypool (2013)","DOI":"10.1007\/978-3-031-01851-0"},{"key":"868_CR32","unstructured":"Augsten, N.: A roadmap towards declarative similarity queries. In: EDBT, pp. 509\u2013512 (2018)"},{"key":"868_CR33","unstructured":"Silva, Y., et\u00a0al.: Similarity queries: their conceptual evaluation, transformations, and processing. VLDB J. (2013)"},{"key":"868_CR34","doi-asserted-by":"crossref","unstructured":"Bayardo, R.J., Ma, Y., Srikant, R.: Scaling up all pairs similarity search. In: WWW, pp. 131\u2013140 (2007)","DOI":"10.1145\/1242572.1242591"},{"key":"868_CR35","doi-asserted-by":"crossref","unstructured":"Chaudhuri, S. et\u00a0al.: A primitive operator for similarity joins in data cleaning. In: ICDE (2006)","DOI":"10.1109\/ICDE.2006.9"},{"issue":"1","key":"868_CR36","first-page":"1","volume":"6","author":"P Bouros","year":"2012","unstructured":"Bouros, P., Ge, S., Mamoulis, N.: Spatio-textual similarity joins. PVLDB 6(1), 1\u201312 (2012)","journal-title":"PVLDB"},{"issue":"4","key":"868_CR37","doi-asserted-by":"publisher","first-page":"360","DOI":"10.14778\/2856318.2856330","volume":"9","author":"D Deng","year":"2015","unstructured":"Deng, D., Li, G., Wen, H., Feng, J.: An efficient partition based method for exact set similarity joins. Proc. VLDB Endow. 9(4), 360\u2013371 (2015)","journal-title":"Proc. VLDB Endow."},{"key":"868_CR38","doi-asserted-by":"crossref","unstructured":"Deng, D., Tao, Y., Li, G.: Overlap set similarity joins with theoretical guarantees. In: SIGMOD (2018)","DOI":"10.1145\/3183713.3183748"},{"key":"868_CR39","doi-asserted-by":"crossref","unstructured":"Zhu, E., Deng, D., Nargesian, F., Miller, R.J.: JOSIE: overlap set similarity search for finding joinable tables in data lakes. In: SIGMOD, pp. 847\u2013864 (2019)","DOI":"10.1145\/3299869.3300065"},{"key":"868_CR40","doi-asserted-by":"crossref","unstructured":"Xiao, C., Wang, W., Lin, X., Yu, J.X., Wang, G.: Efficient similarity joins for near-duplicate detection. ACM Trans. Database Syst. 36(3), 15:1\u201315:41 (2011)","DOI":"10.1145\/2000824.2000825"},{"key":"868_CR41","doi-asserted-by":"crossref","unstructured":"Li, C., Lu, J., Lu, Y.: Efficient merging and filtering algorithms for approximate string searches. In: ICDE, pp. 257\u2013266 (2008)","DOI":"10.1109\/ICDE.2008.4497434"},{"key":"868_CR42","doi-asserted-by":"crossref","unstructured":"Kocher, D., Augsten, N.: A scalable index for top-k subtree similarity queries. In: SIGMOD (2019)","DOI":"10.1145\/3299869.3319892"},{"issue":"1","key":"868_CR43","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1145\/375360.375365","volume":"33","author":"G Navarro","year":"2001","unstructured":"Navarro, G.: A guided tour to approximate string matching. ACM Comput. Surv. 33(1), 31\u201388 (2001)","journal-title":"ACM Comput. Surv."},{"issue":"3","key":"868_CR44","first-page":"253","volume":"5","author":"G Li","year":"2011","unstructured":"Li, G., et al.: PASS-JOIN: a partition-based method for similarity joins. PVLDB 5(3), 253\u2013264 (2011)","journal-title":"PVLDB"},{"key":"868_CR45","doi-asserted-by":"crossref","unstructured":"Xiao, C., Wang, W., Lin, X., Shang, H.: Top-k set similarity joins. In: ICDE, pp. 916\u2013927 (2009)","DOI":"10.1109\/ICDE.2009.111"},{"key":"868_CR46","doi-asserted-by":"crossref","unstructured":"Yang, Z., Zheng, B., Li, G., Zhao, X., Zhou, X., Jensen, C.S.: Adaptive top-k overlap set similarity joins. In: ICDE, pp. 1081\u20131092 (2020)","DOI":"10.1109\/ICDE48307.2020.00098"},{"key":"868_CR47","unstructured":"Broder, A.Z.: On the resemblance and containment of documents. In: Sequences, pp. 21\u201329 (1997)"},{"key":"868_CR48","doi-asserted-by":"crossref","unstructured":"Leskovec, J., Rajaraman, A., Ullman, J.D.: Mining of Massive Data Sets. Cambridge University Press (2020)","DOI":"10.1017\/9781108684163"},{"key":"868_CR49","doi-asserted-by":"crossref","unstructured":"Indyk, P., Motwani, R.: Approximate nearest neighbors: towards removing the curse of dimensionality. In: STOC, pp. 604\u2013613 (1998)","DOI":"10.1145\/276698.276876"},{"key":"868_CR50","doi-asserted-by":"crossref","unstructured":"Fisichella, M., Deng, F., Nejdl, W.: Efficient incremental near duplicate detection based on locality sensitive hashing. In: DEXA, pp. 152\u2013166 (2010)","DOI":"10.1007\/978-3-642-15364-8_11"},{"key":"868_CR51","doi-asserted-by":"crossref","unstructured":"Zhang, W., Wei, H., Sisman, B., Dong, X.L., Faloutsos, C., Page, D.: Autoblock: a hands-off blocking framework for entity matching. In: WSDM, pp. 744\u2013752 (2020)","DOI":"10.1145\/3336191.3371813"},{"key":"868_CR52","doi-asserted-by":"crossref","unstructured":"Ebraheem, M. et\u00a0al.: Distributed representations of tuples for entity resolution. PVLDB, pp. 1454\u20131467 (2018)","DOI":"10.14778\/3236187.3269461"},{"key":"868_CR53","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1162\/tacl_a_00051","volume":"5","author":"P Bojanowski","year":"2017","unstructured":"Bojanowski, P., Grave, E., Joulin, A., Mikolov, T.: Enriching word vectors with subword information. Trans. Assoc. Comput. Linguist. 5, 135\u2013146 (2017)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"868_CR54","doi-asserted-by":"crossref","unstructured":"Mudgal, S. et\u00a0al.: Deep learning for entity matching: a design space exploration. In: SIGMOD, pp. 19\u201334 (2018)","DOI":"10.1145\/3183713.3196926"},{"key":"868_CR55","doi-asserted-by":"crossref","unstructured":"Charikar, M.\u00a0S.: Similarity estimation techniques from rounding algorithms. In: STOC, pp. 380\u2013388 (2002)","DOI":"10.1145\/509907.509965"},{"key":"868_CR56","unstructured":"Nelson , B. et\u00a0al.: Multiprobe-lsh. https:\/\/github.com\/gopalmenon\/Multi-Probe-LSH (2018)"},{"key":"868_CR57","doi-asserted-by":"crossref","unstructured":"Johnson, J., Douze, M., J\u00e9gou, H.: Billion-scale similarity search with gpus. IEEE Trans Big Data (2021)","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"868_CR58","unstructured":"Guo, R., et\u00a0al.: Accelerating large-scale inference with anisotropic vector quantization. In: ICML (2020)"},{"issue":"1","key":"868_CR59","doi-asserted-by":"publisher","first-page":"484","DOI":"10.14778\/1920841.1920904","volume":"3","author":"H K\u00f6pcke","year":"2010","unstructured":"K\u00f6pcke, H., Thor, A., Rahm, E.: Evaluation of entity resolution approaches on real-world match problems. Proc. VLDB Endow. 3(1), 484\u2013493 (2010)","journal-title":"Proc. VLDB Endow."},{"key":"868_CR60","unstructured":"Obraczka, D., Schuchart, J., Rahm, E.: Embedding-assisted entity resolution for knowledge graphs. In: ESWC, vol. 2873 (2021)"},{"issue":"6","key":"868_CR61","doi-asserted-by":"publisher","first-page":"908","DOI":"10.1016\/j.is.2012.11.008","volume":"38","author":"B Kenig","year":"2013","unstructured":"Kenig, B., Gal, A.: Mfiblocks: an effective blocking algorithm for entity resolution. Inf. Syst. 38(6), 908\u2013926 (2013)","journal-title":"Inf. Syst."},{"key":"868_CR62","unstructured":"Andoni, A., Indyk, P., Laarhoven, T., Razenshteyn, I.P., Schmidt, L.: Practical and optimal LSH for angular distance. In: NIPS, pp. 1225\u20131233 (2015)"},{"issue":"1","key":"868_CR63","doi-asserted-by":"publisher","first-page":"31","DOI":"10.14778\/3485450.3485455","volume":"15","author":"A Jain","year":"2021","unstructured":"Jain, A., Sarawagi, S., Sen, P.: Deep indexed active learning for matching heterogeneous entity representations. Proc. VLDB Endow. 15(1), 31\u201345 (2021)","journal-title":"Proc. VLDB Endow."}],"container-title":["The VLDB Journal"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-024-00868-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00778-024-00868-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-024-00868-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,17]],"date-time":"2024-08-17T12:07:18Z","timestamp":1723896438000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00778-024-00868-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,9]]},"references-count":63,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,9]]}},"alternative-id":["868"],"URL":"https:\/\/doi.org\/10.1007\/s00778-024-00868-7","relation":{},"ISSN":["1066-8888","0949-877X"],"issn-type":[{"value":"1066-8888","type":"print"},{"value":"0949-877X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,7,9]]},"assertion":[{"value":"2 June 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 June 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 July 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}