{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T08:27:20Z","timestamp":1768292840851,"version":"3.49.0"},"reference-count":83,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T00:00:00Z","timestamp":1733270400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T00:00:00Z","timestamp":1733270400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100018693","name":"HORIZON EUROPE Framework Programme","doi-asserted-by":"publisher","award":["101070122"],"award-info":[{"award-number":["101070122"]}],"id":[{"id":"10.13039\/100018693","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100018693","name":"HORIZON EUROPE Framework Programme","doi-asserted-by":"publisher","award":["101070122"],"award-info":[{"award-number":["101070122"]}],"id":[{"id":"10.13039\/100018693","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100018693","name":"HORIZON EUROPE Framework Programme","doi-asserted-by":"publisher","award":["101070122"],"award-info":[{"award-number":["101070122"]}],"id":[{"id":"10.13039\/100018693","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013209","name":"Hellenic Foundation for Research and Innovation","doi-asserted-by":"publisher","award":["HFRI-FM17-2351"],"award-info":[{"award-number":["HFRI-FM17-2351"]}],"id":[{"id":"10.13039\/501100013209","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["The VLDB Journal"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s00778-024-00879-4","type":"journal-article","created":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T08:54:37Z","timestamp":1733302477000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["An in-depth analysis of pre-trained embeddings for entity resolution"],"prefix":"10.1007","volume":"34","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5336-8157","authenticated-orcid":false,"given":"Alexandros","family":"Zeakis","sequence":"first","affiliation":[]},{"given":"George","family":"Papadakis","sequence":"additional","affiliation":[]},{"given":"Dimitrios","family":"Skoutas","sequence":"additional","affiliation":[]},{"given":"Manolis","family":"Koubarakis","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,4]]},"reference":[{"issue":"6","key":"879_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3418896","volume":"53","author":"V Christophides","year":"2021","unstructured":"Christophides, V., Efthymiou, V., Palpanas, T., Papadakis, G., Stefanidis, K.: An overview of end-to-end entity resolution for big data. ACM CSUR 53(6), 1\u201342 (2021)","journal-title":"ACM CSUR"},{"issue":"11","key":"879_CR2","first-page":"1188","volume":"6","author":"XL Dong","year":"2013","unstructured":"Dong, X.L., Srivastava, D.: Big data integration. PVLDB 6(11), 1188\u20131189 (2013)","journal-title":"PVLDB"},{"key":"879_CR3","doi-asserted-by":"crossref","unstructured":"Christophides, V., Efthymiou, V., Stefanidis, K.: Entity Resolution in the Web of Data. Morgan & Claypool (2015)","DOI":"10.1007\/978-3-031-79468-1"},{"key":"879_CR4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-31164-2","volume-title":"Data Matching","author":"P Christen","year":"2012","unstructured":"Christen, P.: Data Matching. Springer, Berlin (2012)"},{"issue":"12","key":"879_CR5","first-page":"2018","volume":"5","author":"L Getoor","year":"2012","unstructured":"Getoor, L., Machanavajjhala, A.: Entity resolution: theory, practice & open challenges. PVLDB 5(12), 2018\u20132019 (2012)","journal-title":"PVLDB"},{"issue":"2","key":"879_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3377455","volume":"53","author":"G Papadakis","year":"2021","unstructured":"Papadakis, G., Skoutas, D., Thanos, E., Palpanas, T.: Blocking and filtering techniques for entity resolution: a survey. ACM CSUR 53(2), 1\u201342 (2021)","journal-title":"ACM CSUR"},{"key":"879_CR7","doi-asserted-by":"crossref","unstructured":"Papadakis, G., Ioannou, E., Thanos, E., Palpanas, T.: The Four Generations of Entity Resolution. Morgan & Claypool (2021)","DOI":"10.1007\/978-3-031-01878-7"},{"key":"879_CR8","doi-asserted-by":"crossref","unstructured":"Pilehvar, M.T., Camacho-Collados, J.: Embeddings in Natural Language Processing. Morgan & Claypool (2020)","DOI":"10.1007\/978-3-031-02177-0"},{"issue":"11","key":"879_CR9","first-page":"2459","volume":"14","author":"S Thirumuruganathan","year":"2021","unstructured":"Thirumuruganathan, S., Li, H., Tang, N., Ouzzani, M., Govind, Y., Paulsen, D., Fung, G., Doan, A.: Deep learning for blocking in entity matching: a design space exploration. PVLDB 14(11), 2459\u20132472 (2021)","journal-title":"PVLDB"},{"key":"879_CR10","doi-asserted-by":"crossref","unstructured":"Mudgal, S., Li, H., Rekatsinas, T., Doan, A., Park, Y., Krishnan, G., Deep, R., Arcaute, E., Raghavendra, V.: Deep learning for entity matching: a design space exploration. In: SIGMOD, pp. 19\u201334 (2018)","DOI":"10.1145\/3183713.3196926"},{"key":"879_CR11","unstructured":"Brunner, U., Stockinger, K.: Entity matching with transformer architectures\u2014a step forward in data integration. In: EDBT, pp. 463\u2013473 (2020)"},{"issue":"11","key":"879_CR12","first-page":"1454","volume":"11","author":"M Ebraheem","year":"2018","unstructured":"Ebraheem, M., Thirumuruganathan, S., Joty, S.R., Ouzzani, M., Tang, N.: Distributed representations of tuples for entity resolution. PVLDB 11(11), 1454\u20131467 (2018)","journal-title":"PVLDB"},{"issue":"3","key":"879_CR13","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1109\/TBDATA.2019.2921572","volume":"7","author":"J Johnson","year":"2021","unstructured":"Johnson, J., Douze, M., J\u00e9gou, H.: Billion-scale similarity search with gpus. IEEE Trans. Big Data 7(3), 535\u2013547 (2021)","journal-title":"IEEE Trans. Big Data"},{"issue":"1","key":"879_CR14","first-page":"1","volume":"1","author":"J Tu","year":"2023","unstructured":"Tu, J., Fan, J., Tang, N., Wang, P., Li, G., Du, X., Jia, X., Gao, S.: Unicorn: a unified multi-tasking model for supporting matching tasks in data integration. SIGMOD 1(1), 1\u201326 (2023)","journal-title":"SIGMOD"},{"issue":"1","key":"879_CR15","doi-asserted-by":"publisher","first-page":"50","DOI":"10.14778\/3421424.3421431","volume":"14","author":"Y Li","year":"2020","unstructured":"Li, Y., Li, J., Suhara, Y., Doan, A., Tan, W.: Deep entity matching with pre-trained language models. Proc. VLDB Endow. 14(1), 50\u201360 (2020)","journal-title":"Proc. VLDB Endow."},{"issue":"6","key":"879_CR16","doi-asserted-by":"publisher","first-page":"1369","DOI":"10.1007\/s00778-023-00791-3","volume":"32","author":"G Papadakis","year":"2023","unstructured":"Papadakis, G., Efthymiou, V., Thanos, E., Hassanzadeh, O., Christen, P.: An analysis of one-to-one matching algorithms for entity resolution. VLDB J. 32(6), 1369\u20131400 (2023)","journal-title":"VLDB J."},{"key":"879_CR17","doi-asserted-by":"crossref","unstructured":"Zhang, W., Wei, H., Sisman, B., Dong, X.L., Faloutsos, C., Page, D.: Autoblock: a hands-off blocking framework for entity matching. In: WSDM, pp. 744\u2013752 (2020)","DOI":"10.1145\/3336191.3371813"},{"key":"879_CR18","doi-asserted-by":"crossref","unstructured":"Nie, H., Han, X., He, B., Sun, L., Chen, B., Zhang, W., Wu, S., Kong, H.: Deep sequence-to-sequence entity matching for heterogeneous entity resolution. In: CIKM, pp. 629\u2013638 (2019)","DOI":"10.1145\/3357384.3358018"},{"key":"879_CR19","doi-asserted-by":"crossref","unstructured":"Li, B., Wang, W., Sun, Y., Zhang, L., Ali, M.A., Wang, Y.: Grapher: token-centric entity resolution with graph convolutional neural networks. In: IAAI, pp. 8172\u20138179 (2020)","DOI":"10.1609\/aaai.v34i05.6330"},{"key":"879_CR20","doi-asserted-by":"crossref","unstructured":"Wang, Z., Sisman, B., Wei, H., Dong, X.L., Ji, S.: Cordel: a contrastive deep learning approach for entity linkage. In: ICDM, pp. 1322\u20131327 (2020)","DOI":"10.1109\/ICDM50108.2020.00171"},{"key":"879_CR21","doi-asserted-by":"crossref","unstructured":"Zhang, D., Nie, Y., Wu, S., Shen, Y., Tan, K.: Multi-context attention for entity matching. In: WWW, pp. 2634\u20132640 (2020)","DOI":"10.1145\/3366423.3380017"},{"key":"879_CR22","doi-asserted-by":"crossref","unstructured":"Fu, C., Han, X., He, J., Sun, L.: Hierarchical matching network for heterogeneous entity resolution. In: IJCAI, pp. 3665\u20133671 (2020)","DOI":"10.24963\/ijcai.2020\/507"},{"key":"879_CR23","doi-asserted-by":"crossref","unstructured":"Yao, Z., Li, C., Dong, T., Lv, X., Yu, J., Hou, L., Li, J., Zhang, Y., Dai, Z.: Interpretable and low-resource entity matching via decoupling feature learning from decision making. In: ACL\/IJCNLP, pp. 2770\u20132781 (2021)","DOI":"10.18653\/v1\/2021.acl-long.215"},{"key":"879_CR24","first-page":"1913","volume":"14","author":"R Peeters","year":"2021","unstructured":"Peeters, R., Bizer, C.: Dual-objective fine-tuning of BERT for entity matching. PVLDB 14, 1913\u20131921 (2021)","journal-title":"PVLDB"},{"key":"879_CR25","unstructured":"Paganelli, M., Del\u00a0Buono, F., Marco, P., Guerra, F., Vincini, M.: Automated machine learning for entity matching tasks. In: EDBT, pp. 325\u2013330 (2021)"},{"key":"879_CR26","doi-asserted-by":"crossref","unstructured":"Chen, R., Shen, Y., Zhang, Y.: GNEM: a generic one-to-set neural entity matching framework. In: WWW, pp. 1686\u20131694 (2020)","DOI":"10.1145\/3442381.3450119"},{"issue":"1","key":"879_CR27","first-page":"50","volume":"14","author":"Y Li","year":"2020","unstructured":"Li, Y., Li, J., Suhara, Y., Doan, A., Tan, W.: Deep entity matching with pre-trained language models. PVLDB 14(1), 50\u201360 (2020)","journal-title":"PVLDB"},{"key":"879_CR28","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.D.: Glove: global vectors for word representation. In: EMNLP, pp. 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"879_CR29","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. In: ICLR (Workshop Poster) (2013)"},{"key":"879_CR30","unstructured":"Mikolov, T., Sutskever, I., Chen, K., Corrado, G.S., Dean, J.: Distributed representations of words and phrases and their compositionality. NeurIPS, vol.\u00a026 (2013)"},{"key":"879_CR31","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1162\/tacl_a_00051","volume":"5","author":"P Bojanowski","year":"2017","unstructured":"Bojanowski, P., Grave, E., Joulin, A., Mikolov, T.: Enriching word vectors with subword information. TACL 5, 135\u2013146 (2017)","journal-title":"TACL"},{"key":"879_CR32","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: NAACL-HLT (1). Association for Computational Linguistics, pp. 4171\u20134186 (2019)"},{"key":"879_CR33","unstructured":"Lan, Z., Chen, M., Goodman, S., Gimpel, K., Sharma, P., Soricut, R.: ALBERT: a lite BERT for self-supervised learning of language representations. In: ICLR. OpenReview.net (2020)"},{"key":"879_CR34","unstructured":"Liu, Y., Ott, M., Goyal, N., Du, J., Joshi, M., Chen, D., Levy, O., Lewis, M., Zettlemoyer, L., Stoyanov, V.: Roberta: a robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"879_CR35","unstructured":"Sanh, V., Debut, L., Chaumond, J., Wolf, T.: Distilbert, a distilled version of bert: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108 (2019)"},{"key":"879_CR36","unstructured":"Yang, Z., Dai, Z., Yang, Y., Carbonell, J., Salakhutdinov, R.R., Le, Q.V.: Xlnet: generalized autoregressive pretraining for language understanding. NeurIPS, vol.\u00a032 (2019)"},{"key":"879_CR37","first-page":"16857","volume":"33","author":"K Song","year":"2020","unstructured":"Song, K., Tan, X., Qin, T., Lu, J., Liu, T.-Y.: Mpnet: masked and permuted pre-training for language understanding. NeurIPS 33, 16857\u201316867 (2020)","journal-title":"NeurIPS"},{"issue":"140","key":"879_CR38","first-page":"1","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., Shazeer, N., Roberts, A., Lee, K., Narang, S., Matena, M., Zhou, Y., Li, W., Liu, P.J.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(140), 1\u201367 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"879_CR39","first-page":"5776","volume":"33","author":"W Wang","year":"2020","unstructured":"Wang, W., Wei, F., Dong, L., Bao, H., Yang, N., Zhou, M.: Minilm: deep self-attention distillation for task-agnostic compression of pre-trained transformers. NeurIPS 33, 5776\u20135788 (2020)","journal-title":"NeurIPS"},{"key":"879_CR40","unstructured":"Peeters, R., Bizer, C.: Entity matching using large language models. arXiv preprint arXiv:2310.11244 (2023)"},{"issue":"9","key":"879_CR41","doi-asserted-by":"publisher","first-page":"2225","DOI":"10.14778\/3598581.3598594","volume":"16","author":"A Zeakis","year":"2023","unstructured":"Zeakis, A., Papadakis, G., Skoutas, D., Koubarakis, M.: Pre-trained embeddings for entity resolution: an experimental analysis. Proc. VLDB Endow. 16(9), 2225\u20132238 (2023)","journal-title":"Proc. VLDB Endow."},{"issue":"4","key":"879_CR42","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1145\/3584014.3584017","volume":"22","author":"JB Mugeni","year":"2023","unstructured":"Mugeni, J.B., Amagasa, T.: A graph-based blocking approach for entity matching using contrastively learned embeddings. ACM SIGAPP Appl. Comput. Rev. 22(4), 37\u201346 (2023)","journal-title":"ACM SIGAPP Appl. Comput. Rev."},{"issue":"6","key":"879_CR43","first-page":"1507","volume":"16","author":"D Paulsen","year":"2023","unstructured":"Paulsen, D., Govind, Y., Doan, A.: Sparkly: a simple yet surprisingly strong tf\/idf blocker for entity matching. PVLDB 16(6), 1507\u20131519 (2023)","journal-title":"PVLDB"},{"key":"879_CR44","doi-asserted-by":"crossref","unstructured":"Papadakis, G., Fisichella, M., Schoger, F., Mandilaras, G., Augsten, N., Nejdl, W.: Benchmarking filtering techniques for entity resolution. In: ICDE, pp. 653\u2013666 (2023)","DOI":"10.1109\/ICDE55515.2023.00389"},{"key":"879_CR45","doi-asserted-by":"crossref","unstructured":"Brinkmann, A., Shraga, R., Bizer, C.: Sc-block: supervised contrastive blocking within entity resolution pipelines. In: ESWC, pp. 121\u2013142 (2024)","DOI":"10.1007\/978-3-031-60626-7_7"},{"key":"879_CR46","doi-asserted-by":"crossref","unstructured":"Wu, R., Chaba, S., Sawlani, S., Chu, X., Thirumuruganathan, S.: Zeroer: entity resolution using zero labeled examples. In: SIGMOD, pp. 1149\u20131164 (2020)","DOI":"10.1145\/3318464.3389743"},{"issue":"12","key":"879_CR47","first-page":"12139","volume":"35","author":"C Ge","year":"2021","unstructured":"Ge, C., Wang, P., Chen, L., Liu, X., Zheng, B., Gao, Y.: Collaborem: a self-supervised entity matching framework using multi-features collaboration. TKDE 35(12), 12139\u201312152 (2021)","journal-title":"TKDE"},{"key":"879_CR48","doi-asserted-by":"crossref","unstructured":"Peeters, R., Bizer, C.: Using chatgpt for entity matching. In: European Conference on Advances in Databases and Information Systems, pp. 221\u2013230 (2023)","DOI":"10.1007\/978-3-031-42941-5_20"},{"issue":"4","key":"879_CR49","doi-asserted-by":"publisher","first-page":"738","DOI":"10.14778\/3574245.3574258","volume":"16","author":"A Narayan","year":"2022","unstructured":"Narayan, A., Chami, I., Orr, L.J., R\u00e9, C.: Can foundation models wrangle your data? Proc. VLDB Endow. 16(4), 738\u2013746 (2022)","journal-title":"Proc. VLDB Endow."},{"key":"879_CR50","unstructured":"Zhang, H., Dong, Y., Xiao, C., Oyamada, M.: Jellyfish: a large language model for data preprocessing. arXiv preprint arXiv:2312.01678 (2023)"},{"key":"879_CR51","first-page":"248","volume":"2022","author":"R Peeters","year":"2022","unstructured":"Peeters, R., Bizer, C.: Supervised contrastive learning for product matching. Companion Proc. Web Conf. 2022, 248\u2013251 (2022)","journal-title":"Companion Proc. Web Conf."},{"key":"879_CR52","doi-asserted-by":"crossref","unstructured":"Wang, R., Li, Y., Wang, J.: Sudowoodo: Contrastive self-supervised learning for multi-purpose data integration and preparation. In ICDE , pp. 1502\u20131515 (2023)","DOI":"10.1109\/ICDE55515.2023.00391"},{"key":"879_CR53","doi-asserted-by":"crossref","unstructured":"Yao, D., Gu, Y., Cong, G., Jin, H., Lv, X.: Entity resolution with hierarchical graph attention networks. In: SIGMOD, pp. 429\u2013442 (2022)","DOI":"10.1145\/3514221.3517872"},{"key":"879_CR54","doi-asserted-by":"crossref","unstructured":"Ni, J., Qu, C., Lu, J., Dai, Z., \u00c1brego, G.H., Ma, J., Zhao, V.Y., Luan, Y., Hall, K.B., Chang, M., Yang, Y.: Large dual encoders are generalizable retrievers. In: EMNLP. Association for Computational Linguistics, pp. 9844\u20139855 (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.669"},{"issue":"8","key":"879_CR55","first-page":"1726","volume":"15","author":"M Paganelli","year":"2022","unstructured":"Paganelli, M., Buono, F.D., Baraldi, A., Guerra, F.: Analyzing how BERT performs entity matching. PVLDB 15(8), 1726\u20131738 (2022)","journal-title":"PVLDB"},{"key":"879_CR56","unstructured":"Liu, Q., Kusner, M.J., Blunsom, P.: A survey on contextual embeddings. CoRR, vol. abs\/2003.07278 (2020)"},{"issue":"12","key":"879_CR57","first-page":"3770","volume":"15","author":"I Trummer","year":"2022","unstructured":"Trummer, I.: From BERT to GPT-3 codex: harnessing the potential of very large language models for data management. PVLDB 15(12), 3770\u20133773 (2022)","journal-title":"PVLDB"},{"key":"879_CR58","doi-asserted-by":"crossref","unstructured":"Cer, D., Diab, M., Agirre, E., Lopez-Gazpio, I., Specia, L.: Semeval-2017 task 1: semantic textual similarity-multilingual and cross-lingual focused evaluation. arXiv preprint arXiv:1708.00055 (2017)","DOI":"10.18653\/v1\/S17-2001"},{"key":"879_CR59","doi-asserted-by":"crossref","unstructured":"Wang, A., Singh, A., Michael, J., Hill, F., Levy, O., Bowman, S.R.: GLUE: a multi-task benchmark and analysis platform for natural language understanding. In: BlackboxNLP@EMNLP. Association for Computational Linguistics, pp. 353\u2013355 (2018)","DOI":"10.18653\/v1\/W18-5446"},{"key":"879_CR60","doi-asserted-by":"crossref","unstructured":"Akbarian\u00a0Rastaghi, M., Kamalloo, E., Rafiei, D.: Probing the robustness of pre-trained language models for entity matching. In: CIKM, pp. 3786\u20133790 (2022)","DOI":"10.1145\/3511808.3557673"},{"key":"879_CR61","unstructured":"Peeters, R., Der, R.C., Bizer, C.: WDC products: a multi-dimensional entity matching benchmark. In: EDBT. OpenProceedings.org, pp. 22\u201333 (2024)"},{"key":"879_CR62","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. NeurIPS, vol.\u00a030 (2017)"},{"key":"879_CR63","unstructured":"Sutskever, I., Vinyals, O., Le, Q.V.: Sequence to sequence learning with neural networks. NeurIPS, vol.\u00a027 (2014)"},{"key":"879_CR64","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. In: ICLR (2015)"},{"key":"879_CR65","unstructured":"Hinton, G., Vinyals, O., Dean, J., et\u00a0al.: Distilling the knowledge in a neural network, vol.\u00a02, no.\u00a07, arXiv preprint arXiv:1503.02531 (2015)"},{"key":"879_CR66","unstructured":"Romero, A., Ballas, N., Kahou, S.E., Chassang, A., Gatta, C., Bengio, Y.: Fitnets: Hints for thin deep nets. In: ICLR (Poster) (2015)"},{"key":"879_CR67","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-bert: sentence embeddings using siamese bert-networks. In: EMNLP\/IJCNLP (1). Association for Computational Linguistics, pp. 3980\u20133990 (2019)","DOI":"10.18653\/v1\/D19-1410"},{"key":"879_CR68","doi-asserted-by":"crossref","unstructured":"Jiao, X., Yin, Y., Shang, L., Jiang, X., Chen, X., Li, L., Wang, F., Liu, Q.: Tinybert: distilling BERT for natural language understanding. In: EMNLP (Findings), ser. Findings of ACL, vol. EMNLP 2020. Association for Computational Linguistics, pp. 4163\u20134174 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"key":"879_CR69","doi-asserted-by":"crossref","unstructured":"Sun, Z., Yu, H., Song, X., Liu, R., Yang, Y., Zhou, D.: Mobilebert: a compact task-agnostic BERT for resource-limited devices, pp. 2158\u20132170 (2020)","DOI":"10.18653\/v1\/2020.acl-main.195"},{"issue":"1","key":"879_CR70","first-page":"484","volume":"3","author":"H K\u00f6pcke","year":"2010","unstructured":"K\u00f6pcke, H., Thor, A., Rahm, E.: Evaluation of entity resolution approaches on real-world match problems. PVLDB 3(1), 484\u2013493 (2010)","journal-title":"PVLDB"},{"key":"879_CR71","unstructured":"Obraczka, D., Schuchart, J., Rahm, E.: EAGER: embedding-assisted entity resolution for knowledge graphs. CoRR, vol. abs\/2101.06126 (2021)"},{"key":"879_CR72","doi-asserted-by":"crossref","unstructured":"Papadakis, G., Ioannou, E., Nieder\u00e9e, C., Fankhauser, P.: Efficient entity resolution for large heterogeneous information spaces. In: WSDM, pp. 535\u2013544 (2011)","DOI":"10.1145\/1935826.1935903"},{"issue":"9","key":"879_CR73","first-page":"684","volume":"9","author":"G Papadakis","year":"2016","unstructured":"Papadakis, G., Svirsky, J., Gal, A., Palpanas, T.: Comparative analysis of approximate blocking techniques for entity resolution. PVLDB 9(9), 684\u2013695 (2016)","journal-title":"PVLDB"},{"issue":"6","key":"879_CR74","doi-asserted-by":"publisher","first-page":"908","DOI":"10.1016\/j.is.2012.11.008","volume":"38","author":"B Kenig","year":"2013","unstructured":"Kenig, B., Gal, A.: Mfiblocks: an effective blocking algorithm for entity resolution. Inf. Syst. 38(6), 908\u2013926 (2013)","journal-title":"Inf. Syst."},{"issue":"9","key":"879_CR75","first-page":"1537","volume":"24","author":"P Christen","year":"2012","unstructured":"Christen, P.: A survey of indexing techniques for scalable record linkage and deduplication. TKDE 24(9), 1537\u20131555 (2012)","journal-title":"TKDE"},{"key":"879_CR76","doi-asserted-by":"crossref","unstructured":"Christen, P.: \u201cFebrl -: an open source data cleaning, deduplication and record linkage system with a graphical user interface,\u201d in SIGKDD, pp. 1065\u20131068","DOI":"10.1145\/1401890.1402020"},{"key":"879_CR77","doi-asserted-by":"crossref","unstructured":"Papadakis, G., Kirielle, N., Christen, P., Palpanas, T.: A critical re-evaluation of benchmark datasets for (deep) learning-based matching algorithms. In: ICDE, pp. 3435\u20133448 (2024)","DOI":"10.1109\/ICDE60146.2024.00265"},{"issue":"4","key":"879_CR78","first-page":"312","volume":"9","author":"G Papadakis","year":"2015","unstructured":"Papadakis, G., Alexiou, G., Papastefanatos, G., Koutrika, G.: Schema-agnostic vs schema-based configurations for blocking methods on homogeneous data. PVLDB 9(4), 312\u2013323 (2015)","journal-title":"PVLDB"},{"issue":"8","key":"879_CR79","first-page":"1475","volume":"32","author":"W Li","year":"2019","unstructured":"Li, W., Zhang, Y., Sun, Y., Wang, W., Li, M., Zhang, W., Lin, X.: Approximate nearest neighbor search on high dimensional data-experiments, analyses, and improvement. TKDE 32(8), 1475\u20131488 (2019)","journal-title":"TKDE"},{"issue":"4","key":"879_CR80","doi-asserted-by":"publisher","first-page":"824","DOI":"10.1109\/TPAMI.2018.2889473","volume":"42","author":"YA Malkov","year":"2020","unstructured":"Malkov, Y.A., Yashunin, D.A.: Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs. IEEE Trans. Pattern Anal. Mach. Intell. 42(4), 824\u2013836 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"879_CR81","doi-asserted-by":"crossref","unstructured":"Lacoste-Julien, S., Palla, K., Davies, A., Kasneci, G., Graepel, T., Ghahramani, Z.: Sigma: simple greedy matching for aligning large knowledge bases. In: KDD, pp. 572\u2013580 (2013)","DOI":"10.1145\/2487575.2487592"},{"issue":"12","key":"879_CR82","doi-asserted-by":"publisher","first-page":"1197","DOI":"10.14778\/2994509.2994535","volume":"9","author":"P Konda","year":"2016","unstructured":"Konda, P., et al.: Magellan: toward building entity matching management systems. Proc. VLDB Endow. 9(12), 1197\u20131208 (2016)","journal-title":"Proc. VLDB Endow."},{"issue":"4","key":"879_CR83","first-page":"790","volume":"16","author":"A Zeakis","year":"2022","unstructured":"Zeakis, A., Skoutas, D., Sacharidis, D., Papapetrou, O., Koubarakis, M.: TokenJoin: efficient filtering for set similarity join with maximumweighted bipartite matching. PVLDB 16(4), 790\u2013802 (2022)","journal-title":"PVLDB"}],"container-title":["The VLDB Journal"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-024-00879-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00778-024-00879-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00778-024-00879-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,27]],"date-time":"2025-01-27T05:52:17Z","timestamp":1737957137000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00778-024-00879-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,4]]},"references-count":83,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["879"],"URL":"https:\/\/doi.org\/10.1007\/s00778-024-00879-4","relation":{},"ISSN":["1066-8888","0949-877X"],"issn-type":[{"value":"1066-8888","type":"print"},{"value":"0949-877X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,4]]},"assertion":[{"value":"10 January 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 September 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 October 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 December 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"5"}}