{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T17:35:32Z","timestamp":1725471332835},"publisher-location":"Berlin, Heidelberg","reference-count":31,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540465355"},{"type":"electronic","value":"9783540465362"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2006]]},"DOI":"10.1007\/11892960_146","type":"book-chapter","created":{"date-parts":[[2006,10,9]],"date-time":"2006-10-09T17:29:42Z","timestamp":1160414982000},"page":"1216-1225","source":"Crossref","is-referenced-by-count":0,"title":["Clustering for Data Matching"],"prefix":"10.1007","author":[{"given":"Edward Tersoo","family":"Apeh","sequence":"first","affiliation":[]},{"given":"Bogdan","family":"Gabrys","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"issue":"1","key":"146_CR1","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/S1088-467X(98)00007-9","volume":"1","author":"A. Famili","year":"1997","unstructured":"Famili, A., Shen, W., Weber, R., Simoudis, E.: Data Preprocessing and Intelligent Data Analysis. Intelligent Data Analysis\u00a01(1), 3\u201323 (1997)","journal-title":"Intelligent Data Analysis"},{"key":"146_CR2","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1145\/356827.356830","volume":"12","author":"P.A. Hall","year":"1980","unstructured":"Hall, P.A., Dowling, G.R.: Approximate string matching. Computer Surveys\u00a0(12), 381\u2013402 (1980)","journal-title":"Computer Surveys"},{"key":"146_CR3","unstructured":"Gill, L.: Methods for Automatic Record Matching and Linking and their use in National Statistics. National Statistics Methodology Series No. 25, London (2001)"},{"issue":"1","key":"146_CR4","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1023\/A:1009761603038","volume":"2","author":"M.A. Hernandez","year":"1998","unstructured":"Hernandez, M.A., Stolfo, S.J.: Real-world data is dirty: Data Cleansing and The Merge\/Purge Problem. Knowledge Discovery\u00a02(1), 9\u201337 (1998)","journal-title":"Knowledge Discovery"},{"issue":"3381","key":"146_CR5","doi-asserted-by":"publisher","first-page":"954","DOI":"10.1126\/science.130.3381.954","volume":"130","author":"H.B. Newcombe","year":"1959","unstructured":"Newcombe, H.B., Kennedy, J.M., Axford, S.J., James, A.P.: Automatic linkage of vital records. Science\u00a0130(3381), 954\u2013959 (1959)","journal-title":"Science"},{"key":"146_CR6","doi-asserted-by":"publisher","first-page":"1183","DOI":"10.2307\/2286061","volume":"64","author":"I.P. Fellegi","year":"1969","unstructured":"Fellegi, I.P., Sunter, A.B.: A theory for record linkage. J. Amer. Statist. Assoc.\u00a064, 1183\u20131210 (1969)","journal-title":"J. Amer. Statist. Assoc."},{"key":"146_CR7","unstructured":"William, E.: Winkler and Yves Thibaudeau, An Application of the Fellegi-Sunter Model of Record Linkage to the, U.S. Census, Number RR91\/09 (1990)"},{"key":"146_CR8","unstructured":"Winkler, W.E.: The State of Record Linkage and Current Research Problems. In: Statistical Society of Canada, Proceedings of the Section on Survey Methods, pp. 73\u201379 (1999)"},{"key":"146_CR9","unstructured":"Kimball, R.: Dealing with Dirty Data,DBMS online, Available at URL (September 1996), http:\/\/www.dbmsmag.com\/9609d14.html"},{"key":"146_CR10","unstructured":"Galhardas, H., Florescu, D., Shasha, D., Simon, E., Saita, C.-A.: Declarative Data Cleaning: Language, Model, and Algorithms. In: 27th International Conference on Very Large Data Bases, pp. 371\u2013380 (2001)"},{"issue":"8","key":"146_CR11","doi-asserted-by":"publisher","first-page":"585","DOI":"10.1016\/S0306-4379(01)00041-2","volume":"26","author":"W.L. Low","year":"2001","unstructured":"Low, W.L., Lee, M.-L., Lin, T.W.: A knowledge-based approach for duplicate elimination in data cleaning. Inf. Syst.\u00a026(8), 585\u2013606 (2001)","journal-title":"Inf. Syst."},{"key":"146_CR12","unstructured":"Monge, A.E., Elkan, C.P.: An efficient domain-independent algorithm for detecting approximately duplicate database records. In: Proceedings of the ACM-SIGMOD workshop on Research issues on Knowledge discovery and data mining. AZ (1997)"},{"key":"146_CR13","doi-asserted-by":"crossref","unstructured":"McCallum, A.K., Nigam, K., Ungar, L.H.: Efficient clustering of high dimensional datasets with application to reference matching. In: Sixth International Conference on Knowledge Discovery and Data Mining, Boston (2000)","DOI":"10.1145\/347090.347123"},{"key":"146_CR14","doi-asserted-by":"crossref","unstructured":"Sauleau, E.A., Paumier, J.-P., Buemi, A.: Medical record linkage in health information systems byapproximate string matching and clustering. In: BMC Medical Informatics and Decision Making, pp. 5\u201332 (2005)","DOI":"10.1186\/1472-6947-5-32"},{"key":"146_CR15","volume-title":"Algorithms for Clustering Data","author":"A.K. Jain","year":"1988","unstructured":"Jain, A.K., Dubes, R.C.: Algorithms for Clustering Data. Prentice-Hall, Englewood Cliffs (1988)"},{"key":"146_CR16","unstructured":"Information retrieval: data structures and algorithmspp, pp. 419 - 442 (publication, 1992)"},{"key":"146_CR17","doi-asserted-by":"crossref","unstructured":"Zobel, J., Dart, P.: Phonetic string matching: Lessons from information retrieval. In: Proceedings of the Eighteenth ACM SIGIR International Conference on Research and Development in Information Retrieval, Zurich, Switzerland, pp. 166\u2013173 (August 1996)","DOI":"10.1145\/243199.243258"},{"key":"146_CR18","doi-asserted-by":"crossref","first-page":"373","DOI":"10.1093\/comjnl\/9.4.373","volume":"9","author":"G. Lance","year":"1967","unstructured":"Lance, G., Williams, W.: A general theory of classification sorting strategies. Computer Journal\u00a09, 373\u2013386 (1967)","journal-title":"Computer Journal"},{"key":"146_CR19","volume-title":"Teach\/Me Data Analysis","author":"H. Lohninger","year":"1999","unstructured":"Lohninger, H.: Teach\/Me Data Analysis. Springer, Berlin (1999)"},{"key":"146_CR20","volume-title":"Dunham: Data Mining: Introductory and Advanced Topics","author":"H. Margaret","year":"2002","unstructured":"Margaret, H.: Dunham: Data Mining: Introductory and Advanced Topics. Prentice-Hall, Englewood Cliffs (2002)"},{"issue":"1","key":"146_CR21","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1108\/EUM0000000007161","volume":"54","author":"A.M. Robertson","year":"1998","unstructured":"Robertson, A.M., Willet, P.: Applications of n-grams in textual information systems. Journal of Documentation\u00a054(1), 48\u201369 (1998)","journal-title":"Journal of Documentation"},{"key":"146_CR22","unstructured":"Van-Rijsbergen, C.J.: Information Retrieval, ch. 3, 2nd edn., Butterworths, London, England (1979)"},{"key":"146_CR23","volume-title":"Introduction to Modern Information Retrieval","author":"G. Salton","year":"1983","unstructured":"Salton, G., McGill, M.J.: Introduction to Modern Information Retrieval. McGraw-Hill, New York (1983)"},{"issue":"4","key":"146_CR24","doi-asserted-by":"publisher","first-page":"577","DOI":"10.1016\/0306-4573(88)90027-1","volume":"24","author":"P. Willett","year":"1988","unstructured":"Willett, P.: Recent trends in hierarchic document clustering: A critical review. Information Processing & Management\u00a024(4), 577\u2013597 (1988)","journal-title":"Information Processing & Management"},{"key":"146_CR25","doi-asserted-by":"crossref","unstructured":"Smadja, F.A., McKeown, K.R.: Translating collocations for use in bilingual lexicons. In: Proceedings of the ARPA Human Language Technology Workshop, Princeton, N.J. (1994)","DOI":"10.3115\/1075812.1075843"},{"key":"146_CR26","unstructured":"Teknomo, K.: Similarity Measurement, http:\/\/people.revoledu.com\/kardi\/tutorial\/Similarity\/"},{"key":"146_CR27","unstructured":"Sparck Jones, K.: Automatic keyword classification for information retrieval, Butterworths, London, UK (1971)"},{"key":"146_CR28","volume-title":"Proc. of the 18th Int. Conf. on Data Engineering","author":"M. Elfeky","year":"2002","unstructured":"Elfeky, M., Verykios, V., Elmagarmid, A.: TAILOR: A Record Linkage Toolbox. In: Proc. of the 18th Int. Conf. on Data Engineering, IEEE, Los Alamitos (2002)"},{"key":"146_CR29","doi-asserted-by":"crossref","unstructured":"Eisen, M.B., Spellman, P.T., Browndagger, P.O., Botstein, D.: Cluster analysis and display of genome-wide expression patterns. In: Proceedings of the National Academy of Sciences of the United States of America (PNAS), vol.\u00a095, p. 25 (1998)","DOI":"10.1073\/pnas.95.25.14863"},{"key":"146_CR30","doi-asserted-by":"crossref","first-page":"22","DOI":"10.1093\/bioinformatics\/17.suppl_1.S22","volume":"17","author":"Z. Bar-Joseph","year":"2001","unstructured":"Bar-Joseph, Z., Giord, D.K., Jaakkola, T.S.: Fast optimal leaf ordering for hierarchical clustering. Bioinformatics\u00a017, 22\u201329 (2001)","journal-title":"Bioinformatics"},{"key":"146_CR31","volume-title":"Managing Gigabytes","author":"I. Witten","year":"1999","unstructured":"Witten, I., Moffat, A., Bell, T.: Managing Gigabytes, 2nd edn. Morgan Kaufmann Publishers, New York (1999)","edition":"2"}],"container-title":["Lecture Notes in Computer Science","Knowledge-Based Intelligent Information and Engineering Systems"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/11892960_146.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,17]],"date-time":"2020-11-17T19:52:58Z","timestamp":1605642778000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/11892960_146"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2006]]},"ISBN":["9783540465355","9783540465362"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/11892960_146","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2006]]}}}