{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T06:31:32Z","timestamp":1763706692927,"version":"3.40.3"},"publisher-location":"Cham","reference-count":17,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031466601"},{"type":"electronic","value":"9783031466618"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-46661-8_16","type":"book-chapter","created":{"date-parts":[[2023,11,4]],"date-time":"2023-11-04T13:02:29Z","timestamp":1699102949000},"page":"228-244","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Exploring the Design Space of Unsupervised Blocking with Pre-trained Language Models in Entity Resolution"],"prefix":"10.1007","author":[{"given":"Chenchen","family":"Sun","sequence":"first","affiliation":[]},{"given":"Yuyuan","family":"Jin","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Derong","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Tiezheng","family":"Nie","sequence":"additional","affiliation":[]},{"given":"Xite","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,5]]},"reference":[{"key":"16_CR1","doi-asserted-by":"crossref","unstructured":"Christen, P.: A survey of indexing techniques for scalable record linkage and deduplication. IEEE Trans. Knowl. Data Eng. 24(9), 1537\u20131555 (2011)","DOI":"10.1109\/TKDE.2011.127"},{"key":"16_CR2","doi-asserted-by":"crossref","unstructured":"Ebraheem, M., Thirumuruganathan, S., Joty, S., Ouzzani, M., Tang, N.: Distributed representations of tuples for entity resolution. Proc. VLDB Endowment 11(11), 1454\u20131467 (2018)","DOI":"10.14778\/3236187.3269461"},{"key":"16_CR3","doi-asserted-by":"crossref","unstructured":"Li, B., Miao, Y., Wang, Y., Sun, Y., Wang, W.: Improving the efficiency and effectiveness for BERT-based entity resolution. In:\u00a0Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, no. 15, pp. 13226\u201313233 (2021)","DOI":"10.1609\/aaai.v35i15.17562"},{"key":"16_CR4","doi-asserted-by":"crossref","unstructured":"Zhang, W., Wei, H., Sisman, B., Dong, X. L., Faloutsos, C., Page, D.: AutoBlock: a hands-off blocking framework for entity matching. WSDM, pp. 744\u2013752 (2020)","DOI":"10.1145\/3336191.3371813"},{"key":"16_CR5","doi-asserted-by":"crossref","unstructured":"Thirumuruganathan, S., Li, H., Tang, N., Ouzzani, M., Govind, Y., Paulsen, D., Fung, G., Doan, A.: Deep learning for blocking in entity matching: a design space exploration. Proc. VLDB Endow. 14(11), 2459\u20132472 (2021)","DOI":"10.14778\/3476249.3476294"},{"issue":"1","key":"16_CR6","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1007\/s41019-020-00146-w","volume":"6","author":"F Azzalini","year":"2020","unstructured":"Azzalini, F., Jin, S., Renzi, M., Tanca, L.: Blocking techniques for entity linkage: a semantics-based approach. Data Sci. Eng. 6(1), 20\u201338 (2020)","journal-title":"Data Sci. Eng."},{"key":"16_CR7","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"16_CR8","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-BERT: sentence embeddings using siamese BERT-networks. In: EMNLP-IJCNLP, pp. 3980\u20133990 (2019)","DOI":"10.18653\/v1\/D19-1410"},{"key":"16_CR9","unstructured":"Le, Q., Mikolov, T.: Distributed representations of sentences and documents. In: ICML, pp. 1188\u20131196 (2014)"},{"key":"16_CR10","doi-asserted-by":"crossref","unstructured":"Bojanowski, P., Grave, E., Joulin, A., Mikolov, T.: Enriching word vectors with subword information. Trans. Assoc. Comput. Linguistics, 5, 135\u2013146 (2017)","DOI":"10.1162\/tacl_a_00051"},{"key":"16_CR11","unstructured":"Han, J., Pei, J., Tong, H.: Data Mining: Concepts and Techniques. Morgan Kaufmann (2022)"},{"issue":"2","key":"16_CR12","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1145\/235968.233324","volume":"25","author":"T Zhang","year":"1996","unstructured":"Zhang, T., Ramakrishnan, R., Livny, M.: BIRCH: an efficient data clustering method for very large databases. ACM SIGMOD Rec. 25(2), 103\u2013114 (1996). https:\/\/doi.org\/10.1145\/235968.233324","journal-title":"ACM SIGMOD Rec."},{"issue":"5814","key":"16_CR13","doi-asserted-by":"publisher","first-page":"972","DOI":"10.1126\/science.1136800","volume":"315","author":"BJ Frey","year":"2007","unstructured":"Frey, B.J., Dueck, D.: Clustering by passing messages between data points. Science 315(5814), 972\u2013976 (2007)","journal-title":"Science"},{"key":"16_CR14","unstructured":"Arora, S., Liang, Y., Ma, T.: A simple but tough-to-beat baseline for sentence embeddings. In: ICLR (Poster) (2017)"},{"key":"16_CR15","doi-asserted-by":"crossref","unstructured":"Primpeli, A., Peeters, R., Bizer, C.: The WDC training dataset and gold standard for large-scale product matching. In: Companion Proceedings of the 2019 World Wide Web Conference, pp. 381\u2013386 (2019)","DOI":"10.1145\/3308560.3316609"},{"key":"16_CR16","doi-asserted-by":"crossref","unstructured":"Jawahar, G., Sagot, B., Seddah, D.: What does BERT learn about the structure of language?. ACL 1, 3651\u20133657 (2019)","DOI":"10.18653\/v1\/P19-1356"},{"key":"16_CR17","doi-asserted-by":"crossref","unstructured":"Clark, K., Khandelwal, U., Levy, O., Manning, C.D.: What does BERT look at? An analysis of bert's attention: BlackboxNLP@ACL, 276\u2013286 (2019)","DOI":"10.18653\/v1\/W19-4828"}],"container-title":["Lecture Notes in Computer Science","Advanced Data Mining and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-46661-8_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,4]],"date-time":"2023-11-04T13:04:29Z","timestamp":1699103069000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-46661-8_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031466601","9783031466618"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-46661-8_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"5 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ADMA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Advanced Data Mining and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shenyang","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 August 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 August 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"adma2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/adma2023.uqcloud.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes. Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"503","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"216","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"43% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.97","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.77","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}