{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,11]],"date-time":"2025-03-11T04:13:34Z","timestamp":1741666414371,"version":"3.38.0"},"reference-count":62,"publisher":"SAGE Publications","issue":"2","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IDA"],"published-print":{"date-parts":[[2016,3,1]]},"DOI":"10.3233\/ida-160814","type":"journal-article","created":{"date-parts":[[2016,3,8]],"date-time":"2016-03-08T15:50:34Z","timestamp":1457452234000},"page":"455-475","source":"Crossref","is-referenced-by-count":8,"title":["Entity resolution in disjoint graphs: An application on genealogical data"],"prefix":"10.1177","volume":"20","author":[{"given":"Hossein","family":"Rahmani","sequence":"first","affiliation":[{"name":"School of Computer Engineering, Iran University of Science and Technology, Tehran, Iran"},{"name":"Maastricht University, Maastricht, MD, The Netherlands"}]},{"given":"Bijan","family":"Ranjbar-Sahraei","sequence":"additional","affiliation":[{"name":"Maastricht University, Maastricht, MD, The Netherlands"}]},{"given":"Gerhard","family":"Weiss","sequence":"additional","affiliation":[{"name":"Maastricht University, Maastricht, MD, The Netherlands"}]},{"given":"Karl","family":"Tuyls","sequence":"additional","affiliation":[{"name":"University of Liverpool, Liverpool, UK"}]}],"member":"179","reference":[{"key":"10.3233\/IDA-160814_ref1","doi-asserted-by":"crossref","first-page":"954","DOI":"10.1126\/science.130.3381.954","article-title":"Automatic linkage of vital records","volume":"130","author":"Newcombe","year":"1959","journal-title":"Science"},{"key":"10.3233\/IDA-160814_ref2","doi-asserted-by":"crossref","first-page":"1183","DOI":"10.1080\/01621459.1969.10501049","article-title":"A theory for record linkage","volume":"64","author":"Fellegi","year":"1969","journal-title":"Journal of the American Statistical Association"},{"key":"10.3233\/IDA-160814_ref4","unstructured":"Bhattacharya I. and Getoor L., Iterative record linkage for cleaning and integration, in: Proceedings of the 9th ACM SIGMOD Workshop on Research Issues in Data Mining and Knowledge Discovery, DMKD '04, ACM, New York, NY, USA, (2004), 11-18. URL: http:\/\/doi.acm.org\/10.1145\/1008694.1008697. doi: 101145\/1008694.1008697."},{"key":"10.3233\/IDA-160814_ref5","unstructured":"Ravikumar P. and Cohen W.W., A hierarchical graphical model for record linkage, in: Proceedings of the 20th Conference on Uncertainty in Artificial Intelligence, UAI '04, AUAI Press, Arlington, Virginia, United States, (2004), 454-461. http: \/\/dlacm.org\/citation.cfm?id=1036843.1036898."},{"key":"10.3233\/IDA-160814_ref7","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1145\/568271.223807","article-title":"The merge\/purge problem for large databases","volume":"24","author":"Hern\u00e1ndez","year":"1995","journal-title":"SIGMOD Rec"},{"key":"10.3233\/IDA-160814_ref8","unstructured":"Monge A.E. and Elkan C., An efficient domain-independent algorithm for detecting approximately duplicate database records, in: DMKD, (1997). URL: http:\/\/dblp.uni-trier.de\/db\/conf\/dmkd\/dmkd97.html#MongeE97."},{"key":"10.3233\/IDA-160814_ref9","unstructured":"Sarawagi S. and Bhamidipaty A., Interactive deduplication using active learning, in: Proceedings of the Eighth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD '02, ACM, New York, NY, USA, (2002), 269-278. URL: http:\/\/doi.acm.org\/10.1145\/775047.775087. doi: 101145\/775047.775087."},{"key":"10.3233\/IDA-160814_ref10","unstructured":"Ananthakrishna R., Chaudhuri S. and Ganti V., Eliminating fuzzy duplicates in data warehouses, in: Proceedings of the 28th International Conference on Very Large Data Bases, VLDB '02, VLDB Endowment, (2002), 586-597. URL: http:\/\/ dlacm.org\/citation.cfm?id=1287369.1287420."},{"key":"10.3233\/IDA-160814_ref11","unstructured":"Cohen W.W., Kautz H.A. and McAllester D.A., Hardening soft information sources, in: KDD, R. Ramakrishnan, S.J. Stolfo, R.J. Bayardo and I. Parsa, eds, ACM, (2000), 255-259. URL: http:\/\/dblp.uni-trier.de\/db\/conf\/kdd\/kdd2000.html# CohenKM00."},{"key":"10.3233\/IDA-160814_ref12","doi-asserted-by":"crossref","unstructured":"McCallum A., Nigam K. and Ungar L.H., Efficient clustering of high-dimensional data sets with application to reference matching, in: Proceedings of the Sixth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, ACM, (2000), 169-178.","DOI":"10.1145\/347090.347123"},{"key":"10.3233\/IDA-160814_ref13","first-page":"572","volume-title":"In ICDM","author":"Singla","year":"2006"},{"key":"10.3233\/IDA-160814_ref14","doi-asserted-by":"crossref","first-page":"255","DOI":"10.1007\/s00778-008-0098-x","article-title":"Swoosh: A generic approach to entity resolution","volume":"18","author":"Benjelloun","year":"2009","journal-title":"VLDB J"},{"key":"10.3233\/IDA-160814_ref15","doi-asserted-by":"crossref","unstructured":"Getoor L. and Machanavajjhala A., Entity resolution for big data, in: Proceedings of the 19th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, ACM, (2013), 1527-1527.","DOI":"10.1145\/2487575.2506179"},{"key":"10.3233\/IDA-160814_ref16","unstructured":"Efremova J., Ranjbar-Sahraei B., Oliehoek F.A., Calders T. and Tuyls K., A baseline method for genealogical entity resolution, in: Workshop on Population Reconstruction, (2014)."},{"volume-title":"Mining Graph Data","year":"2006","author":"Bhattacharya","key":"10.3233\/IDA-160814_ref17"},{"key":"10.3233\/IDA-160814_ref18","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1007\/s13222-012-0110-x","article-title":"Parallel entity resolution with dedoop","volume":"13","author":"Kolb","year":"2013","journal-title":"Datenbank-Spektrum"},{"key":"10.3233\/IDA-160814_ref20","first-page":"1326","article-title":"Entity resolution with evolving rules","volume":"3","author":"Whang","year":"2010","journal-title":"PVLDB"},{"key":"10.3233\/IDA-160814_ref22","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1145\/375360.375365","article-title":"A guided tour to approximate string matching","volume":"33","author":"Navarro","year":"2001","journal-title":"ACM Computing Surveys (CSUR)"},{"key":"10.3233\/IDA-160814_ref23","unstructured":"Efremova J., Ranjbar-Sahraei B. and Calders T., A hybrid disambiguation measure for inaccurate cultural heritage data, in: The 8th Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities, (2014)."},{"key":"10.3233\/IDA-160814_ref24","unstructured":"Bogdanov P. and Singh A.K., Function prediction using neighborhood patterns, in: BioKDD Workshop, (2008)."},{"key":"10.3233\/IDA-160814_ref25","unstructured":"He J., Li M., Zhang H.-J., Tong H. and Zhang C., Manifold-ranking based image retrieval, in: Proceedings of the 12th Annual ACM International Conference on Multimedia, MULTIMEDIA '04, ACM, New York, NY, USA, (2004), 9-16. URL: http:\/\/doi.acm.org\/10.1145\/1027527.1027531. doi: 101145\/1027527.1027531."},{"key":"10.3233\/IDA-160814_ref26","unstructured":"Jeh G. and Widom J., Simrank: A measure of structural-context similarity, in: Proceedings of the Eighth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD '02, ACM, New York, NY, USA, (2002), 538-543. URL: http:\/\/doi.acm.org\/10.1145\/775047.775126. doi: 101145\/775047.775126."},{"key":"10.3233\/IDA-160814_ref27","unstructured":"Page L., Brin S., Motwani R. and Winograd T., The pagerank citation ranking: Bringing order to the web, in: Proceedings of the 7th International World Wide Web Conference, Brisbane, Australia, (1998), 161-172. URL: citeseer.nj.nec.com\/ page98pagerankhtml."},{"key":"10.3233\/IDA-160814_ref28","unstructured":"Monge A.E. and Elkan C., An efficient domain-independent algorithm for detecting approximately duplicate database records, in: DMKD, (1997)."},{"key":"10.3233\/IDA-160814_ref29","first-page":"355","article-title":"Matching and record linkage","volume":"1","author":"Winkler","year":"1995","journal-title":"Business Survey Methods"},{"key":"10.3233\/IDA-160814_ref30","unstructured":"Monge A. and Elkan C., The field matching problem: Algorithms and applications, in: In Proceedings of the Second International Conference on Knowledge Discovery and Data Mining, (1996), 267-270."},{"key":"10.3233\/IDA-160814_ref31","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1145\/375360.375365","article-title":"A guided tour to approximate string matching","volume":"33","author":"Navarro","year":"2001","journal-title":"ACM Comput Surv"},{"volume-title":"Proceedings of IJCAI-03 Workshop on Information Integration","author":"Cohen","key":"10.3233\/IDA-160814_ref32"},{"key":"10.3233\/IDA-160814_ref33","unstructured":"Chaudhuri S., Ganjam K., Ganti V. and Motwani R., Robust and efficient fuzzy match for online data cleaning, in: Proceedings of the 2003 ACM SIGMOD International Conference on Management of Data, SIGMOD '03, ACM, New York, NY, USA, (2003), 313-324. URL: http:\/\/doi.acm.org\/10.1145\/872757.872796. doi: 101145\/872757.872796."},{"key":"10.3233\/IDA-160814_ref34","doi-asserted-by":"crossref","first-page":"522","DOI":"10.1109\/34.682181","article-title":"Learning string edit distance","volume":"20","author":"Ristad","year":"1998","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.3233\/IDA-160814_ref35","unstructured":"Bilenko M. and Mooney R.J., Adaptive duplicate detection using learnable string similarity measures, in: Proceedings of the Ninth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD '03, ACM, New York, NY, USA, (2003), 39-48. URL: http:\/\/doi.acm.org\/10.1145\/956750.956759. doi: 101145\/956750.956759."},{"key":"10.3233\/IDA-160814_ref36","doi-asserted-by":"crossref","unstructured":"Cohen W.W. and Richman J., Learning to match and cluster large high-dimensional data sets for data integration, in: Proceedings of the Eighth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD '02, ACM, New York, NY, USA, (2002), 475-480. URL: http:\/\/doi.acm.org\/10.1145\/775047.775116. doi: 10.1145\/775047. 775116.","DOI":"10.1145\/775047"},{"key":"10.3233\/IDA-160814_ref37","doi-asserted-by":"crossref","first-page":"2001","DOI":"10.1016\/S0306-4379(01)00042-4","article-title":"Learning object identification rules for information integration","volume":"26","author":"Tejada","year":"2001","journal-title":"Information Systems"},{"key":"10.3233\/IDA-160814_ref38","doi-asserted-by":"crossref","unstructured":"Sarawagi S. and Bhamidipaty A., Interactive deduplication using active learning, in: The Eighth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD-2002), (2002), 269-278.","DOI":"10.1145\/775085.775087"},{"key":"10.3233\/IDA-160814_ref39","unstructured":"Koudas N., Marathe A. and Srivastava D., Flexible string matching against large databases in practice, in: Proceedings of the Thirtieth International Conference on Very Large Data Bases - Volume 30, VLDB '04, VLDB Endowment, (2004), 1078-1086. URL: http:\/\/dlacm.org\/citation.cfm?id=1316689.1316782."},{"key":"10.3233\/IDA-160814_ref41","doi-asserted-by":"crossref","first-page":"16","DOI":"10.1109\/MIS.2003.1234765","article-title":"Adaptive name matching in information integration","volume":"18","author":"Bilenko","year":"2003","journal-title":"IEEE Intelligent Systems"},{"key":"10.3233\/IDA-160814_ref42","first-page":"31","article-title":"Multi-relational record linkage","author":"Singla","year":"2004","journal-title":"KDD-2004 Workshop on Multi-Relational Data Mining"},{"key":"10.3233\/IDA-160814_ref43","unstructured":"Dong X., Halevy A. and Madhavan J., Reference reconciliation in complex information spaces, in: Proceedings of the 2005 ACM SIGMOD International Conference on Management of Data, SIGMOD '05, ACM, New York, NY, USA, (2005), 85-96. URL: http:\/\/doi.acm.org\/10.1145\/1066157.1066168. doi: 101145\/1066157.1066168."},{"key":"10.3233\/IDA-160814_ref44","doi-asserted-by":"crossref","first-page":"716","DOI":"10.1145\/1138394.1138401","article-title":"Domain-independent data cleaning via analysis of entity-relationship graph","volume":"31","author":"Kalashnikov","year":"2006","journal-title":"ACM Trans Database Syst"},{"key":"10.3233\/IDA-160814_ref45","unstructured":"Doan A., Lu Y., Lee Y. and Han J., Object matching for information integration: A profiler-based approach, in: Proceedings of the IJCAI-2003 Workshop on Information Integration on the Web, Acapulco, Mexico, (2003), 53-58."},{"key":"10.3233\/IDA-160814_ref46","unstructured":"Neville J., Adler M. and Jensen D., Clustering relational data using attribute and link information, in: In Proceedings of the Text Mining and Link Analysis Workshop, 18th International Joint Conference on Artificial Intelligence, (2003), 9-15."},{"volume-title":"The Relational Model for Database Management: Version 2","year":"1990","author":"Codd","key":"10.3233\/IDA-160814_ref47"},{"volume-title":"Fundamentals of Database Systems","year":"1999","author":"Elmasri","key":"10.3233\/IDA-160814_ref48"},{"key":"10.3233\/IDA-160814_ref49","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1146\/annurev.soc.27.1.415","article-title":"Birds of a feather: Homophily in social networks","volume":"27","author":"McPherson","year":"2001","journal-title":"Annual Review of Sociology"},{"key":"10.3233\/IDA-160814_ref50","unstructured":"McCallum A., Nigam K. and Ungar L.H., Efficient clustering of high-dimensional data sets with application to reference matching, in: Proceedings of the Sixth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD '00, ACM, New York, NY, USA, (2000), 169-178. URL: http:\/\/doi.acm.org\/10.1145\/347090.347123. doi: 101145\/347090.347123."},{"key":"10.3233\/IDA-160814_ref51","first-page":"25","article-title":"A comparison of fast blocking methods for record linkage","author":"Baxter","year":"2003","journal-title":"ACM SIGKDD '03 Workshop on Data Cleaning, Record Linkage, and Object Consolidation"},{"key":"10.3233\/IDA-160814_ref52","first-page":"87","article-title":"Adaptive blocking: Learning to scale up record linkage","author":"Bilenko","year":"2006","journal-title":"ICDM"},{"key":"10.3233\/IDA-160814_ref53","first-page":"167","article-title":"Adaptive and flexible blocking for record linkage tasks","volume":"1","author":"Evangelista","year":"2010","journal-title":"JIDM"},{"key":"10.3233\/IDA-160814_ref55","unstructured":"Philips L., Hanging on the metaphone, Computer Language 7 (1990)."},{"key":"10.3233\/IDA-160814_ref56","first-page":"38","article-title":"The double metaphone search algorithm","volume":"18","author":"Philips","year":"2000","journal-title":"C\/C++ Users Journal"},{"key":"10.3233\/IDA-160814_ref57","unstructured":"Rahmani H., Ranjbar-Sahraei B., Weiss G. and Tuyls K., Contextual entity resolution approach for genealogical data, in: Workshop on Knowledge Discovery, Data Mining and Machine Learning, (2014)."},{"key":"10.3233\/IDA-160814_ref59","doi-asserted-by":"crossref","unstructured":"Can T., \u00c7amo\u011flu O. and Singh A.K., Analysis of protein-protein interaction networks using random walks, in: Proceedings of the 5th International Workshop on Bioinformatics, ACM, (2005), 61-68.","DOI":"10.1145\/1134030.1134042"},{"key":"10.3233\/IDA-160814_ref60","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1145\/1217299.1217304","article-title":"Collective entity resolution in relational data","volume":"1","author":"Bhattacharya","year":"2007","journal-title":"ACM Transactions on Knowledge Discovery From Data (TKDD)"},{"volume-title":"Introduction to the Practice of Statistics: Extended","year":"2009","author":"Moore","key":"10.3233\/IDA-160814_ref61"},{"key":"10.3233\/IDA-160814_ref62","doi-asserted-by":"crossref","first-page":"607","DOI":"10.1177\/001316447003000308","article-title":"Determining sample size for research activities","volume":"30","author":"Krejcie","year":"1970","journal-title":"Educational and Psychological Measurement"},{"key":"10.3233\/IDA-160814_ref63","unstructured":"Efremova J., Ranjbar-Sahraei B., Oliehoek F.A., Calders T. and Tuyls K., An interactive, web-based tool for genealogical entity resolution, in: 25th Benelux Conference on Artificial Intelligence, (2013), 376-377."},{"key":"10.3233\/IDA-160814_ref64","first-page":"35","volume-title":"Prosopography Approaches and Applications A Handbook","author":"Koenraad","year":"2007"},{"key":"10.3233\/IDA-160814_ref65","doi-asserted-by":"crossref","DOI":"10.1002\/9781118617151","volume-title":"Biological Knowledge Discovery Handbook: Preprocessing, Mining and Postprocessing of Biological Data","author":"Elloumi","year":"2013"},{"key":"10.3233\/IDA-160814_ref66","doi-asserted-by":"crossref","first-page":"957","DOI":"10.1016\/j.cell.2005.08.029","article-title":"A human protein-protein interaction network: A resource for annotating the proteome","volume":"122","author":"Stelzl","year":"2005","journal-title":"Cell"},{"key":"10.3233\/IDA-160814_ref67","first-page":"257","article-title":"Uncovering biological network function via graphlet degree signatures","volume":"6","author":"Milenkovi","year":"2008","journal-title":"Cancer Informatics"},{"key":"10.3233\/IDA-160814_ref68","doi-asserted-by":"crossref","first-page":"36+","DOI":"10.1186\/1752-0509-3-36","article-title":"Identifying disease-specific genes based on their topological significance in protein networks","volume":"3","author":"Dezso","year":"2009","journal-title":"BMC Systems Biology"},{"key":"10.3233\/IDA-160814_ref69","doi-asserted-by":"crossref","first-page":"709","DOI":"10.1016\/j.drudis.2007.07.011","article-title":"Human protein-protein interaction networks and the value for drug discovery","volume":"12","author":"Ruffner","year":"2007","journal-title":"Drug Discovery Today"}],"container-title":["Intelligent Data Analysis"],"original-title":[],"link":[{"URL":"https:\/\/content.iospress.com\/download?id=10.3233\/IDA-160814","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,10]],"date-time":"2025-03-10T13:15:02Z","timestamp":1741612502000},"score":1,"resource":{"primary":{"URL":"https:\/\/journals.sagepub.com\/doi\/full\/10.3233\/IDA-160814"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,3,1]]},"references-count":62,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.3233\/ida-160814","relation":{},"ISSN":["1088-467X","1571-4128"],"issn-type":[{"type":"print","value":"1088-467X"},{"type":"electronic","value":"1571-4128"}],"subject":[],"published":{"date-parts":[[2016,3,1]]}}}