{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T00:04:44Z","timestamp":1774569884772,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":29,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819722419","type":"print"},{"value":"9789819722426","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-2242-6_16","type":"book-chapter","created":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T09:02:31Z","timestamp":1713949351000},"page":"194-205","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Class Ratio and\u00a0Its Implications for\u00a0Reproducibility and\u00a0Performance in\u00a0Record Linkage"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-3250-605X","authenticated-orcid":false,"given":"Jeremy","family":"Foxcroft","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3435-2015","authenticated-orcid":false,"given":"Peter","family":"Christen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9718-3795","authenticated-orcid":false,"given":"Luiza","family":"Antonie","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,4,25]]},"reference":[{"key":"16_CR1","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1007\/978-3-319-93040-4_8","volume-title":"Advances in Knowledge Discovery and Data Mining","author":"\u00d6 Akg\u00fcn","year":"2018","unstructured":"Akg\u00fcn, \u00d6., Dearle, A., Kirby, G.N.C., Christen, P.: Using metric space indexing for complete and efficient record linkage. In: Phung, D., Tseng, V., Webb, G., Ho, B., Ganji, M., Rashidi, L. (eds.) PAKDD 2018. LNCS, vol. 10939, pp. 89\u2013101. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-93040-4_8"},{"key":"16_CR2","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"262","DOI":"10.1007\/978-3-030-16142-2_21","volume-title":"Advances in Knowledge Discovery and Data Mining","author":"IC Anindya","year":"2019","unstructured":"Anindya, I.C., Kantarcioglu, M., Malin, B.: Determining the impact of missing values on blocking in record linkage. In: Yang, Q., Zhou, Z.H., Gong, Z., Zhang, M.L., Huang, S.J. (eds.) PAKDD 2019. LNCS, vol. 11441, pp. 262\u2013274. Springer, Heidelberg (2019). https:\/\/doi.org\/10.1007\/978-3-030-16142-2_21"},{"issue":"1","key":"16_CR3","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman, L.: Random forests. Mach. Learn. 45(1), 5\u201332 (2001). https:\/\/doi.org\/10.1023\/A:1010933404324","journal-title":"Mach. Learn."},{"key":"16_CR4","doi-asserted-by":"publisher","unstructured":"Brunner, U., Stockinger, K.: Entity matching with transformer architectures - a step forward in data integration. In: Proceedings of the 23rd EDBT (2020). https:\/\/doi.org\/10.21256\/ZHAW-19637","DOI":"10.21256\/ZHAW-19637"},{"key":"16_CR5","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"449","DOI":"10.1007\/978-3-319-31753-3_36","volume-title":"Advances in Knowledge Discovery and Data Mining","author":"X Cao","year":"2016","unstructured":"Cao, X., Zheng, Y., Shi, C., Li, J., Wu, B.: Link prediction in schema-rich heterogeneous information network. In: Bailey, J., Khan, L., Washio, T., Dobbie, G., Huang, J., Wang, R. (eds.) PAKDD 2016. LNCS, vol. 9651, pp. 449\u2013460. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-31753-3_36"},{"key":"16_CR6","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"396","DOI":"10.1007\/978-3-030-47436-2_30","volume-title":"Advances in Knowledge Discovery and Data Mining","author":"Y Cao","year":"2020","unstructured":"Cao, Y., Peng, H., Yu, P.S.: Multi-information source HIN for medical concept embedding. In: Lauw, H., Wong, R.W., Ntoulas, A., Lim, E.P., Ng, S.K., Pan, S. (eds.) PAKDD 2020. LNCS, vol. 12085, pp. 396\u2013408. Springer, Heidelberg (2020). https:\/\/doi.org\/10.1007\/978-3-030-47436-2_30"},{"key":"16_CR7","series-title":"Data-Centric Systems and Applications","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-31164-2","volume-title":"Data Matching - Concepts and Techniques for Record Linkage, Entity Resolution, and Duplicate Detection","author":"P Christen","year":"2012","unstructured":"Christen, P.: Data Matching - Concepts and Techniques for Record Linkage, Entity Resolution, and Duplicate Detection. Data-Centric Systems and Applications, Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-31164-2"},{"issue":"3","key":"16_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3606367","volume":"56","author":"P Christen","year":"2023","unstructured":"Christen, P., Hand, D.J., Kirielle, N.: A review of the F-measure: its history, properties, criticism, and alternatives. ACM Comput. Surv. 56(3), 1\u201324 (2023)","journal-title":"ACM Comput. Surv."},{"key":"16_CR9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59706-1","volume-title":"Linking Sensitive Data","author":"P Christen","year":"2020","unstructured":"Christen, P., Ranbaduge, T., Schnell, R.: Linking Sensitive Data. Springer, Heidelberg (2020). https:\/\/doi.org\/10.1007\/978-3-030-59706-1"},{"issue":"3","key":"16_CR10","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1007\/BF00994018","volume":"20","author":"C Cortes","year":"1995","unstructured":"Cortes, C., Vapnik, V.: Support-vector networks. Mach. Learn. 20(3), 273\u2013297 (1995). https:\/\/doi.org\/10.1007\/BF00994018","journal-title":"Mach. Learn."},{"key":"16_CR11","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"665","DOI":"10.1007\/978-3-030-46147-8_40","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"S Fakhraei","year":"2019","unstructured":"Fakhraei, S., Mathew, J., Ambite, J.L.: NSEEN: neural semantic embedding for entity normalization. In: Brefeld, U., Fromont, E., Hotho, A., Knobbe, A., Maathuis, M., Robardet, C. (eds.) ECML PKDD 2019. LNCS, vol. 11907, pp. 665\u2013680. Springer, Heidelberg (2019). https:\/\/doi.org\/10.1007\/978-3-030-46147-8_40"},{"issue":"12","key":"16_CR12","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1145\/3458723","volume":"64","author":"T Gebru","year":"2021","unstructured":"Gebru, T., et al.: Datasheets for datasets. Commun. ACM 64(12), 86\u201392 (2021). https:\/\/doi.org\/10.1145\/3458723","journal-title":"Commun. ACM"},{"key":"16_CR13","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1093\/pubmed\/fdx037","volume":"40","author":"R Gilbert","year":"2017","unstructured":"Gilbert, R., et al.: Guild: guidance for information about linking data sets. J. Public Health 40, 191\u2013198 (2017)","journal-title":"J. Public Health"},{"issue":"3","key":"16_CR14","doi-asserted-by":"publisher","first-page":"539","DOI":"10.1007\/s11222-017-9746-6","volume":"28","author":"DJ Hand","year":"2018","unstructured":"Hand, D.J., Christen, P.: A note on using the F-measure for evaluating record linkage algorithms. Stat. Comput. 28(3), 539\u2013547 (2018)","journal-title":"Stat. Comput."},{"key":"16_CR15","doi-asserted-by":"publisher","unstructured":"Harron, K., et al.: Challenges in administrative data linkage for research. Big Data Soc. 4(2) (2017). https:\/\/doi.org\/10.1177\/2053951717745678. pMID: 30381794","DOI":"10.1177\/2053951717745678"},{"key":"16_CR16","series-title":"Springer Series in Statistics","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-21606-5","volume-title":"The Elements of Statistical Learning","author":"T Hastie","year":"2001","unstructured":"Hastie, T., Tibshirani, R., Friedman, J.: The Elements of Statistical Learning. Springer Series in Statistics, Springer, New York (2001). https:\/\/doi.org\/10.1007\/978-0-387-21606-5"},{"key":"16_CR17","doi-asserted-by":"publisher","DOI":"10.1007\/0-387-69505-2","volume-title":"Data Quality and Record Linkage","author":"T Herzog","year":"2007","unstructured":"Herzog, T., Scheuren, F., Winkler, W.: Data Quality and Record Linkage. Springer, New York (2007). https:\/\/doi.org\/10.1007\/0-387-69505-2"},{"key":"16_CR18","doi-asserted-by":"publisher","unstructured":"Kapoor, S., Narayanan, A.: Leakage and the reproducibility crisis in ML-based science (2022). https:\/\/doi.org\/10.48550\/ARXIV.2207.07048","DOI":"10.48550\/ARXIV.2207.07048"},{"key":"16_CR19","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-319-75420-8_1","volume-title":"ACIIDS 2018","author":"N Kooli","year":"2018","unstructured":"Kooli, N., Allesiardo, R., Pigneul, E.: Deep learning based approach for entity resolution in databases. In: Nguyen, N.T., Hoang, D.H., Hong, T., Pham, H., Trawinski, B. (eds.) ACIIDS 2018. LNCS, vol. 10752, pp. 3\u201312. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-75420-8_1"},{"issue":"2","key":"16_CR20","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1016\/j.datak.2009.10.003","volume":"69","author":"H K\u00f6pcke","year":"2010","unstructured":"K\u00f6pcke, H., Rahm, E.: Frameworks for entity matching: a comparison. Data Knowl. Eng. 69(2), 197\u2013210 (2010). https:\/\/doi.org\/10.1016\/j.datak.2009.10.003","journal-title":"Data Knowl. Eng."},{"key":"16_CR21","doi-asserted-by":"publisher","unstructured":"Koumarelas, l., Papenbrock, T., Naumann, F.: Mdedup: duplicate detection with matching dependencies. Proc. VLDB Endow. 13(5), 712\u2013725 (2020). https:\/\/doi.org\/10.14778\/3377369.3377379","DOI":"10.14778\/3377369.3377379"},{"key":"16_CR22","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1007\/978-3-662-44851-9_15","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"ZC Lipton","year":"2014","unstructured":"Lipton, Z.C., Elkan, C., Naryanaswamy, B.: Optimal thresholding of classifiers to maximize F1 measure. In: Calders, T., Esposito, F., H\u00fcllermeier, E., Meo, R. (eds.) ECML PKDD 2014. LNCS, vol. 8725, pp. 225\u2013239. Springer, Heidelberg (2014). https:\/\/doi.org\/10.1007\/978-3-662-44851-9_15"},{"key":"16_CR23","doi-asserted-by":"publisher","unstructured":"Makri, C., Karakasidis, A., Pitoura, E.: Towards a more accurate and fair SVM-based record linkage. In: Tsumoto, S., et al. (eds.) International Conference on Big Data, Osaka, pp. 4691\u20134699. IEEE (2022). https:\/\/doi.org\/10.1109\/BigData55660.2022.10020514","DOI":"10.1109\/BigData55660.2022.10020514"},{"key":"16_CR24","doi-asserted-by":"publisher","unstructured":"Mitchell, M., et al.: Model cards for model reporting. In: Proceedings of the Conference on Fairness, Accountability, and Transparency, FAT 2019, pp. 220\u2013229. Association for Computing Machinery, New York (2019). https:\/\/doi.org\/10.1145\/3287560.3287596","DOI":"10.1145\/3287560.3287596"},{"key":"16_CR25","doi-asserted-by":"publisher","unstructured":"Mudgal, S., et al.: Deep learning for entity matching: a design space exploration. In: Proceedings of the 2018 International Conference on Management of Data, SIGMOD 2018, pp. 19\u201334. Association for Computing Machinery, New York (2018). https:\/\/doi.org\/10.1145\/3183713.3196926","DOI":"10.1145\/3183713.3196926"},{"key":"16_CR26","doi-asserted-by":"crossref","unstructured":"Papadakis, G., Kirielle, N., Christen, P., Palpanas, T.: A critical re-evaluation of benchmark datasets for (deep) learning-based matching algorithms. In: IEEE International Conference on Data Engineering (ICDE), Utrecht (2024)","DOI":"10.1109\/ICDE60146.2024.00265"},{"issue":"1","key":"16_CR27","first-page":"1","volume":"22","author":"J Pineau","year":"2021","unstructured":"Pineau, J., et al.: Improving reproducibility in machine learning research (a report from the neurips 2019 reproducibility program). J. Mach. Learn. Res. 22(1), 1\u201320 (2021)","journal-title":"J. Mach. Learn. Res."},{"key":"16_CR28","doi-asserted-by":"publisher","unstructured":"Primpeli, A., Bizer, C.: Profiling entity matching benchmark tasks. In: Proceedings of the 29th ACM International Conference on Information and Knowledge Management, CIKM 2020, pp. 3101\u20133108. Association for Computing Machinery, New York (2020). https:\/\/doi.org\/10.1145\/3340531.3412781","DOI":"10.1145\/3340531.3412781"},{"issue":"1","key":"16_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/S0306-4573(96)00043-X","volume":"33","author":"W Shaw","year":"1997","unstructured":"Shaw, W., Burgin, R., Howell, P.: Performance standards and evaluations in IR test collections: cluster-based retrieval models. Inf. Process. Manag. 33(1), 1\u201314 (1997). https:\/\/doi.org\/10.1016\/S0306-4573(96)00043-X","journal-title":"Inf. Process. Manag."}],"container-title":["Lecture Notes in Computer Science","Advances in Knowledge Discovery and Data Mining"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-2242-6_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,16]],"date-time":"2024-11-16T23:00:26Z","timestamp":1731798026000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-2242-6_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819722419","9789819722426"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-2242-6_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"25 April 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PAKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific-Asia Conference on Knowledge Discovery and Data Mining","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taipei","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taiwan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 May 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 May 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pakdd2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/pakdd2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}