{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T11:14:47Z","timestamp":1725534887095},"publisher-location":"Berlin, Heidelberg","reference-count":27,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642030666"},{"type":"electronic","value":"9783642030673"}],"license":[{"start":{"date-parts":[[2009,1,1]],"date-time":"2009-01-01T00:00:00Z","timestamp":1230768000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009]]},"DOI":"10.1007\/978-3-642-03067-3_26","type":"book-chapter","created":{"date-parts":[[2009,7,9]],"date-time":"2009-07-09T01:45:02Z","timestamp":1247103902000},"page":"325-337","source":"Crossref","is-referenced-by-count":5,"title":["The Normalized Compression Distance as a Distance Measure in Entity Identification"],"prefix":"10.1007","author":[{"given":"Sebastian","family":"Klenk","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dennis","family":"Thom","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gunther","family":"Heidemann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"26_CR1","unstructured":"Alfonseca, M., Cebri\u00e1n, M., Ortega, A.: Testing genetic algorithm recombination strategies and the normalized compression distance for computer-generated music. In: AIKED 2006: Proceedings of the 5th WSEAS International Conference on Artificial Intelligence, Knowledge Engineering and Data Bases, Stevens Point, Wisconsin, USA, pp. 53\u201358. World Scientific and Engineering Academy and Society (WSEAS) (2006)"},{"key":"26_CR2","unstructured":"Amitay, E., Yogev, S., Yom-Tov, E.: Serial sharers: Detecting split identities of web authors. In: Stein, B., Koppel, M., Stamatatos, E. (eds.) PAN. CEUR Workshop Proceedings, vol.\u00a0276, CEUR-WS.org (2007)"},{"key":"26_CR3","volume-title":"Modern Information Retrieval","author":"R. Baeza-Yates","year":"1999","unstructured":"Baeza-Yates, R., Ribeiro-Neto, B.: Modern Information Retrieval. Addison-Wesley, Reading (1999)"},{"key":"26_CR4","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1145\/956750.956759","volume-title":"KDD 2003: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining","author":"M. Bilenko","year":"2003","unstructured":"Bilenko, M., Mooney, R.J.: Adaptive duplicate detection using learnable string similarity measures. In: KDD 2003: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining, pp. 39\u201348. ACM, New York (2003)"},{"issue":"4","key":"26_CR5","doi-asserted-by":"publisher","first-page":"668","DOI":"10.1109\/JPROC.2008.916370","volume":"96","author":"M.A. Casey","year":"2008","unstructured":"Casey, M.A., Veltkamp, R., Goto, M., Leman, M., Rhodes, C., Slaney, M.: Content-based music information retrieval: Current directions and future challenges. Proceedings of the IEEE\u00a096(4), 668\u2013696 (2008)","journal-title":"Proceedings of the IEEE"},{"issue":"5","key":"26_CR6","doi-asserted-by":"publisher","first-page":"1895","DOI":"10.1109\/TIT.2007.894669","volume":"53","author":"M. Cebrian","year":"2007","unstructured":"Cebrian, M., Alfonseca, M., Ortega, A.: The normalized compression distance is resistant to noise. IEEE Transactions on Information Theory\u00a053(5), 1895\u20131900 (2007)","journal-title":"IEEE Transactions on Information Theory"},{"key":"26_CR7","unstructured":"Christen, P.: A two-step classification approach to unsupervised record linkage. In: AusDM 2007: Proceedings of the sixth Australasian conference on Data mining and analytics, Darlinghurst, Australia, pp. 111\u2013119. Australian Computer Society, Inc. (2007)"},{"key":"26_CR8","series-title":"Studies in Computational Intelligence","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/978-3-540-44918-8_6","volume-title":"Quality Measures in Data Mining","author":"P. Christen","year":"2007","unstructured":"Christen, P., Goiser, K.: Quality and complexity measures for data linkage and deduplication. In: Guillet, F., Hamilton, H.J. (eds.) Quality Measures in Data Mining. Studies in Computational Intelligence, vol.\u00a043, pp. 127\u2013151. Springer, Heidelberg (2007)"},{"key":"26_CR9","doi-asserted-by":"crossref","unstructured":"Cilibrasi, R., Vitanyi, P.: Clustering by compression. IEEE Transactions on Information Theory\u00a051(4) (2005)","DOI":"10.1109\/TIT.2005.844059"},{"issue":"1","key":"26_CR10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TKDE.2007.250581","volume":"19","author":"A.K. Elmagarmid","year":"2007","unstructured":"Elmagarmid, A.K., Ipeirotis, P.G., Verykios, V.S.: Duplicate record detection: A survey. IEEE Transactions on Knowledge and Data Engineering\u00a019(1), 1\u201316 (2007)","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"issue":"328","key":"26_CR11","doi-asserted-by":"publisher","first-page":"1183","DOI":"10.1080\/01621459.1969.10501049","volume":"64","author":"I.P. Fellegi","year":"1969","unstructured":"Fellegi, I.P., Sunter, A.B.: A theory for record linkage. Journal of the American Statistical Association\u00a064(328), 1183\u20131210 (1969)","journal-title":"Journal of the American Statistical Association"},{"key":"26_CR12","volume-title":"An introduction to probability theory and its applications","author":"W. Feller","year":"1950","unstructured":"Feller, W.: An introduction to probability theory and its applications, vol.\u00a01. Wiley, Chichester (1950)"},{"key":"26_CR13","unstructured":"Goiser, K., Christen, P.: Towards automated record linkage. In: AusDM 2006: Proceedings of the fifth Australasian conference on Data mining and analystics, Darlinghurst, Australia, pp. 23\u201331. Australian Computer Society, Inc. (2006)"},{"key":"26_CR14","volume-title":"Data mining","author":"J. Han","year":"2001","unstructured":"Han, J., Kamber, M.: Data mining. Morgan Kaufmann, San Francisco (2001)"},{"key":"26_CR15","doi-asserted-by":"crossref","unstructured":"Heidemann, G., Ritter, H.: On the Contribution of Compression to Visual Pattern Recognition. In: Proc. 3rd Int\u2019l Conf. on Comp. Vision Theory and Applications, Funchal, Madeira - Portugal, vol.\u00a02, pp. 83\u201389 (2008)","DOI":"10.1109\/ISCCSP.2008.4537468"},{"issue":"1","key":"26_CR16","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1023\/A:1009761603038","volume":"2","author":"M.A. Hern\u00e1ndez","year":"1998","unstructured":"Hern\u00e1ndez, M.A., Stolfo, S.J.: Real-world data is dirty: Data cleansing and the merge\/purge problem. Data Min. Knowl. Discov.\u00a02(1), 9\u201337 (1998)","journal-title":"Data Min. Knowl. Discov."},{"key":"26_CR17","volume-title":"The Data Warehouse ETL Toolkit: Practical Techniques for Extracting, Cleanin","author":"R. Kimball","year":"2004","unstructured":"Kimball, R., Caserta, J.: The Data Warehouse ETL Toolkit: Practical Techniques for Extracting, Cleanin. John Wiley & Sons, Chichester (2004)"},{"key":"26_CR18","unstructured":"Li, M., Chen, X., Li, X., Ma, B., Vitanyi, P.: The similarity metric (2001)"},{"key":"26_CR19","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1145\/347090.347123","volume-title":"KDD 2000: Proceedings of the sixth ACM SIGKDD international conference on Knowledge discovery and data mining","author":"A. McCallum","year":"2000","unstructured":"McCallum, A., Nigam, K., Ungar, L.H.: Efficient clustering of high-dimensional data sets with application to reference matching. In: KDD 2000: Proceedings of the sixth ACM SIGKDD international conference on Knowledge discovery and data mining, pp. 169\u2013178. ACM Press, New York (2000)"},{"issue":"1","key":"26_CR20","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1145\/375360.375365","volume":"33","author":"G. Navarro","year":"2001","unstructured":"Navarro, G.: A guided tour to approximate string matching. ACM Comput. Surv.\u00a033(1), 31\u201388 (2001)","journal-title":"ACM Comput. Surv."},{"issue":"1","key":"26_CR21","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1145\/322234.322237","volume":"28","author":"M. Rodeh","year":"1981","unstructured":"Rodeh, M., Pratt, V.R., Even, S.: Linear algorithm for data compression via string matching. J. ACM\u00a028(1), 16\u201324 (1981)","journal-title":"J. ACM"},{"key":"26_CR22","doi-asserted-by":"crossref","first-page":"269","DOI":"10.1145\/775047.775087","volume-title":"KDD 2002: Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining","author":"S. Sarawagi","year":"2002","unstructured":"Sarawagi, S., Bhamidipaty, A.: Interactive deduplication using active learning. In: KDD 2002: Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining, pp. 269\u2013278. ACM Press, New York (2002)"},{"key":"26_CR23","unstructured":"Winkler, W.E.: Overview of record linkage and current research directions. Technical Report RRS2006\/02, US Bureau of the Census (2006)"},{"key":"26_CR24","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1145\/1255175.1255213","volume-title":"JCDL 2007: Proceedings of the 7th ACM\/IEEE-CS joint conference on Digital libraries","author":"S. Yan","year":"2007","unstructured":"Yan, S., Lee, D., Kan, M.-Y., Giles, L.C.: Adaptive sorted neighborhood methods for efficient record linkage. In: JCDL 2007: Proceedings of the 7th ACM\/IEEE-CS joint conference on Digital libraries, pp. 185\u2013194. ACM Press, New York (2007)"},{"issue":"1","key":"26_CR25","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1145\/1188913.1188916","volume":"50","author":"H. Zhao","year":"2007","unstructured":"Zhao, H.: Semantic matching across heterogeneous data sources. Commun. ACM\u00a050(1), 45\u201350 (2007)","journal-title":"Commun. ACM"},{"issue":"2","key":"26_CR26","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1016\/j.is.2003.11.001","volume":"30","author":"H. Zhao","year":"2005","unstructured":"Zhao, H., Ram, S.: Entity identification for heterogeneous database integration: a multiple classifier system approach and empirical evaluation. Inf. Syst.\u00a030(2), 119\u2013132 (2005)","journal-title":"Inf. Syst."},{"key":"26_CR27","doi-asserted-by":"crossref","unstructured":"Zhao, H., Ram, S.: Entity matching across heterogeneous data sources: An approach based on constrained cascade generalization. Data & Knowledge Engineering (corrected proof) (in press, 2008) (available online May 4)","DOI":"10.1016\/j.datak.2008.04.007"}],"container-title":["Lecture Notes in Computer Science","Advances in Data Mining. Applications and Theoretical Aspects"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-03067-3_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,8]],"date-time":"2021-10-08T08:28:20Z","timestamp":1633681700000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-03067-3_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009]]},"ISBN":["9783642030666","9783642030673"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-03067-3_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2009]]}}}