{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T06:58:22Z","timestamp":1760597902905,"version":"3.38.0"},"reference-count":48,"publisher":"SAGE Publications","issue":"5","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IDA"],"published-print":{"date-parts":[[2019,10,24]]},"DOI":"10.3233\/ida-184238","type":"journal-article","created":{"date-parts":[[2019,10,29]],"date-time":"2019-10-29T18:32:42Z","timestamp":1572373962000},"page":"1145-1166","source":"Crossref","is-referenced-by-count":3,"title":["Distant supervision of relation extraction in sparse data"],"prefix":"10.1177","volume":"23","author":[{"given":"Bijan","family":"Ranjbar-Sahraei","sequence":"first","affiliation":[{"name":"Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, The Netherlands"}]},{"given":"Hossein","family":"Rahmani","sequence":"additional","affiliation":[{"name":"Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, The Netherlands"},{"name":"School of Computer Engineering, Iran University of Science and Technology, Teheran, Iran"}]},{"given":"Gerhard","family":"Weiss","sequence":"additional","affiliation":[{"name":"Department of Data Science and Knowledge Engineering, Maastricht University, Maastricht, The Netherlands"}]},{"given":"Karl","family":"Tuyls","sequence":"additional","affiliation":[{"name":"Liverpool University, UK"}]}],"member":"179","reference":[{"key":"10.3233\/IDA-184238_ref1","doi-asserted-by":"crossref","unstructured":"E. Agichtein and V. Ganti, Mining reference tables for automatic text segmentation, in: Proceedings of the Tenth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, ACM, 2004, pp. 20\u201329.","DOI":"10.1145\/1014052.1014058"},{"key":"10.3233\/IDA-184238_ref2","doi-asserted-by":"crossref","unstructured":"E. Agichtein and L. Gravano, Snowball: Extracting relations from large plain-text collections, in: Proceedings of the Fifth ACM Conference on Digital Libraries, ACM, 2000, pp. 85\u201394.","DOI":"10.1145\/336597.336644"},{"key":"10.3233\/IDA-184238_ref5","unstructured":"M. Banko, M.J. Cafarella, S. Soderland, M. Broadhead and O. Etzioni, Open information extraction for the web, in: IJCAI, Vol. 7, 2007, pp. 2670\u20132676."},{"key":"10.3233\/IDA-184238_ref6","unstructured":"S. Bartunov, A. Korshunov, S.-T. Park, W. Ryu and H. Lee, Joint link-attribute user identity resolution in online social networks, in: Proceedings of the 6th International Conference on Knowledge Discovery and Data Mining, Workshop on Social Network Mining and Analysis, ACM, 2012."},{"issue":"1","key":"10.3233\/IDA-184238_ref7","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1145\/1217299.1217304","article-title":"Collective entity resolution in relational data","volume":"1","author":"Bhattacharya","year":"2007","journal-title":"ACM Transactions on Knowledge Discovery from Data (TKDD)"},{"key":"10.3233\/IDA-184238_ref8","doi-asserted-by":"crossref","unstructured":"L. Bilge, T. Strufe, D. Balzarotti and E. Kirda, All your contacts are belong to us: automated identity theft attacks on social networks, in: Proceedings of the 18th International Conference on World Wide Web, ACM, 2009, pp. 551\u2013560.","DOI":"10.1145\/1526709.1526784"},{"issue":"3","key":"10.3233\/IDA-184238_ref9","doi-asserted-by":"crossref","first-page":"153","DOI":"10.3366\/hac.1994.6.3.153","article-title":"Corpus-based name standardization","volume":"6","author":"Bloothooft","year":"1994","journal-title":"History and Computing"},{"key":"10.3233\/IDA-184238_ref11","doi-asserted-by":"crossref","unstructured":"V. Borkar, K. Deshmukh and S. Sarawagi, Automatic segmentation of text into structured records, in: ACM SIGMOD Record, ACM, Vol. 30, 2001, pp. 175\u2013186.","DOI":"10.1145\/376284.375682"},{"key":"10.3233\/IDA-184238_ref12","doi-asserted-by":"crossref","unstructured":"S. Brin, Extracting patterns and relations from the world wide web, in: The World Wide Web and Databases, Springer, 1999, pp. 172\u2013183.","DOI":"10.1007\/10704656_11"},{"key":"10.3233\/IDA-184238_ref14","doi-asserted-by":"crossref","unstructured":"R.C. Bunescu and R.J. Mooney, A shortest path dependency kernel for relation extraction, in: Proceedings of the Conference on Human Language Technology and Empirical Methods in Natural Language Processing, Association for Computational Linguistics, 2005, pp. 724\u2013731.","DOI":"10.3115\/1220575.1220666"},{"key":"10.3233\/IDA-184238_ref15","unstructured":"K. C., G. M., X. C., Q. W. and Z. A., Entity matching across multiple heterogeneous data sources, in: Database Systems for Advanced Applications. DASFAA 2016. Lecture Notes in Computer Science."},{"key":"10.3233\/IDA-184238_ref16","unstructured":"F.Y. Choi, Advances in domain independent linear text segmentation, in: Proceedings of the 1st North American Chapter of the Association for Computational Linguistics Conference, Association for Computational Linguistics, 2000, pp. 26\u201333."},{"key":"10.3233\/IDA-184238_ref18","unstructured":"P. Christen and R. Gayler, Towards scalable real-time entity resolution using a similarity-aware inverted index approach, in: Proceedings of the 7th Australasian Data Mining Conference-Volume 87, Australian Computer Society, Inc., 2008, pp. 51\u201360."},{"issue":"1","key":"10.3233\/IDA-184238_ref21","doi-asserted-by":"crossref","first-page":"123","DOI":"10.1162\/089120101300346822","article-title":"Unsupervised named entity recognition using syntactic and semantic contextual evidence","volume":"27","author":"Cucchiarelli","year":"2001","journal-title":"Computational Linguistics"},{"key":"10.3233\/IDA-184238_ref22","doi-asserted-by":"crossref","unstructured":"J. Efremova, B. Ranjbar-Sahraei and T. Calders, A hybrid disambiguation measure for inaccurate cultural heritage data, in: The 8th Workshop on LaTeCH, 2014, pp. 47\u201355.","DOI":"10.3115\/v1\/W14-0607"},{"key":"10.3233\/IDA-184238_ref23","unstructured":"J. Efremova, B. Ranjbar-Sahraei, F.A. Oliehoek, T. Calders and K. Tuyls, A baseline method for genealogical entity resolution, in: Workshop on Population Reconstruction, 2014."},{"key":"10.3233\/IDA-184238_ref24","doi-asserted-by":"crossref","unstructured":"J. Efremova, B. Ranjbar-Sahraei, H. Rahmani, F. Oliehoek, T. Calders, K. Tuyls and G. Weiss, Multi-source entity resolution for genealogical data, in: Population Reconstruction, Springer International Publishing, 2015, pp. 129\u2013154.","DOI":"10.1007\/978-3-319-19884-2_7"},{"issue":"12","key":"10.3233\/IDA-184238_ref26","doi-asserted-by":"crossref","first-page":"68","DOI":"10.1145\/1409360.1409378","article-title":"Open information extraction from the web","volume":"51","author":"Etzioni","year":"2008","journal-title":"Communications of the ACM"},{"key":"10.3233\/IDA-184238_ref27","doi-asserted-by":"crossref","unstructured":"O. Etzioni, M. Cafarella, D. Downey, S. Kok, A.-M. Popescu, T. Shaked, S. Soderland, D.S. Weld and A. Yates, Web-scale information extraction in knowitall:(preliminary results), in: Proceedings of the 13th international Conference on World Wide Web, ACM, 2004, pp. 100\u2013110.","DOI":"10.1145\/988672.988687"},{"issue":"1","key":"10.3233\/IDA-184238_ref28","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1016\/j.artint.2005.03.001","article-title":"Unsupervised named-entity extraction from the web: An experimental study","volume":"165","author":"Etzioni","year":"2005","journal-title":"Artificial Intelligence"},{"key":"10.3233\/IDA-184238_ref29","doi-asserted-by":"crossref","unstructured":"Z. GuoDong, S. Jian, Z. Jie and Z. Min, Exploring various knowledge in relation extraction, in: Proceedings of the 43rd Annual Meeting on Association for Computational Linguistics, Association for Computational Linguistics, 2005, pp. 427\u2013434.","DOI":"10.3115\/1219840.1219893"},{"key":"10.3233\/IDA-184238_ref30","doi-asserted-by":"crossref","unstructured":"N. Kambhatla, Combining lexical, syntactic, and semantic features with maximum entropy models for extracting relations, in: Proceedings of the ACL 2004 on Interactive Poster and Demonstration Sessions, Morristown, NJ, USA, Association for Computational Linguistics, 2004, p. 22.","DOI":"10.3115\/1219044.1219066"},{"issue":"1-2","key":"10.3233\/IDA-184238_ref31","doi-asserted-by":"crossref","first-page":"484","DOI":"10.14778\/1920841.1920904","article-title":"Evaluation of entity resolution approaches on real-world match problems","volume":"3","author":"K\u00f6pcke","year":"2010","journal-title":"Proceedings of the VLDB Endowment"},{"issue":"7","key":"10.3233\/IDA-184238_ref32","doi-asserted-by":"crossref","first-page":"1522","DOI":"10.1109\/TKDE.2017.2684804","article-title":"Profiling entities over time in the presence of unreliable sources","volume":"29","author":"Li","year":"2017","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"issue":"9","key":"10.3233\/IDA-184238_ref33","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1145\/1105664.1105679","article-title":"Information extraction: Distilling structured data from unstructured text","volume":"3","author":"McCallum","year":"2005","journal-title":"Queue"},{"key":"10.3233\/IDA-184238_ref34","doi-asserted-by":"crossref","unstructured":"M. Mintz, S. Bills, R. Snow and D. Jurafsky, Distant supervision for relation extraction without labeled data, in: Proceedings of the Joint Conference of the 47th Annual Meeting of the ACL and the 4th International Joint Conference on Natural Language Processing of the AFNLP: Volume 2-Volume 2, Association for Computational Linguistics, 2009, pp. 1003\u20131011.","DOI":"10.3115\/1690219.1690287"},{"key":"10.3233\/IDA-184238_ref35","doi-asserted-by":"crossref","unstructured":"M. Motoyama and G. Varghese, I seek you: searching and matching individuals in social networks, in: Proceedings of the Eleventh International Workshop on Web Information and Data Management, ACM, 2009, pp. 67\u201375.","DOI":"10.1145\/1651587.1651604"},{"issue":"1","key":"10.3233\/IDA-184238_ref36","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1075\/li.30.1.03nad","article-title":"A survey of named entity recognition and classification","volume":"30","author":"Nadeau","year":"2007","journal-title":"Lingvisticae Investigationes"},{"key":"10.3233\/IDA-184238_ref38","unstructured":"T.-V.T. Nguyen and A. Moschitti, End-to-end relation extraction using distant supervision from external semantic repositories, in: Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies: Short Papers-Volume 2, Association for Computational Linguistics, 2011, pp. 277\u2013282."},{"key":"10.3233\/IDA-184238_ref39","doi-asserted-by":"crossref","unstructured":"H. Purohit, A. Dow, O. Alonso, L. Duan and K. Haas, User taglines: Alternative presentations of expertise and interest in social media, in: Social Informatics (SocialInformatics), 2012 International Conference on, IEEE, 2012, pp. 236\u2013243.","DOI":"10.1109\/SocialInformatics.2012.68"},{"issue":"1","key":"10.3233\/IDA-184238_ref41","doi-asserted-by":"crossref","first-page":"81","DOI":"10.1007\/BF00116251","article-title":"Induction of decision trees","volume":"1","author":"Quinlan","year":"1986","journal-title":"Machine Learning"},{"key":"10.3233\/IDA-184238_ref43","doi-asserted-by":"crossref","unstructured":"H. Rahmani, B. Ranjbar-Sahraei, G. Weiss and K. Tuyls, Entity resolution in disjoint graphs: an application on genealogical data, Intelligent Data Analysis 20(2) (2016).","DOI":"10.3233\/IDA-160814"},{"issue":"5-6","key":"10.3233\/IDA-184238_ref44","doi-asserted-by":"crossref","first-page":"475","DOI":"10.1080\/713827175","article-title":"Feature selection for the naive bayesian classifier using decision trees","volume":"17","author":"Ratanamahatana","year":"2003","journal-title":"Applied Artificial Intelligence"},{"key":"10.3233\/IDA-184238_ref45","doi-asserted-by":"crossref","unstructured":"L. Ratinov and D. Roth, Design challenges and misconceptions in named entity recognition, in: Proceedings of the Thirteenth Conference on Computational Natural Language Learning, Association for Computational Linguistics, 2009, pp. 147\u2013155.","DOI":"10.3115\/1596374.1596399"},{"key":"10.3233\/IDA-184238_ref46","doi-asserted-by":"crossref","unstructured":"D. Ravichandran and E. Hovy, Learning surface text patterns for a question answering system, in: Proceedings of the 40th Annual Meeting on Association for Computational Linguistics, Association for Computational Linguistics, 2002, pp. 41\u201347.","DOI":"10.3115\/1073083.1073092"},{"key":"10.3233\/IDA-184238_ref47","doi-asserted-by":"crossref","unstructured":"S. Riedel, L. Yao and A. McCallum, Modeling relations and their mentions without labeled text, in: Machine Learning and Knowledge Discovery in Databases, Springer, 2010, pp. 148\u2013163.","DOI":"10.1007\/978-3-642-15939-8_10"},{"issue":"3","key":"10.3233\/IDA-184238_ref48","doi-asserted-by":"crossref","first-page":"261","DOI":"10.1561\/1900000003","article-title":"Information extraction","volume":"1","author":"Sarawagi","year":"2008","journal-title":"Foundations and Trends in Databases"},{"key":"10.3233\/IDA-184238_ref49","unstructured":"M. Schraagen and H.J. Hoogeboom, Predicting record linkage potential in a family reconstruction graph, in: 23th Benelux Conference on Artificial Intelligence (BNAIC2011), 2011, pp. 199\u2013206."},{"issue":"1","key":"10.3233\/IDA-184238_ref50","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1007\/s10032-002-0082-8","article-title":"Fast string correction with levenshtein automata","volume":"5","author":"Schulz","year":"2002","journal-title":"International Journal on Document Analysis and Recognition"},{"key":"10.3233\/IDA-184238_ref52","doi-asserted-by":"crossref","unstructured":"Y. Shinyama and S. Sekine, Preemptive information extraction using unrestricted relation discovery, in: Proceedings of the Main Conference on Human Language Technology Conference of the North American Chapter of the Association of Computational Linguistics, Association for Computational Linguistics, 2006, pp. 304\u2013311.","DOI":"10.3115\/1220835.1220874"},{"key":"10.3233\/IDA-184238_ref54","unstructured":"M. Surdeanu and M. Ciaramita, Robust Information Extraction with Perceptrons, in: Proceedings of the NIST 2007 Automatic Content Extraction Workshop (ACE07), 2007."},{"key":"10.3233\/IDA-184238_ref55","doi-asserted-by":"crossref","unstructured":"E.F. Tjong Kim Sang and F. De Meulder, Introduction to the conll-2003 shared task: Language-independent named entity recognition, in: Proceedings of the Seventh Conference on Natural language learning at HLT-NAACL 2003-Volume 4, Association for Computational Linguistics, 2003, pp. 142\u2013147.","DOI":"10.3115\/1119176.1119195"},{"key":"10.3233\/IDA-184238_ref56","doi-asserted-by":"crossref","unstructured":"J. Vosecky, D. Hong and V.Y. Shen, User identification across multiple social networks, in: Networked Digital Technologies, 2009. NDT\u201909. First International Conference on, IEEE, 2009, pp. 360\u2013365.","DOI":"10.1109\/NDT.2009.5272173"},{"issue":"5571","key":"10.3233\/IDA-184238_ref57","doi-asserted-by":"crossref","first-page":"1302","DOI":"10.1126\/science.1070120","article-title":"Identity and search in social networus patent appks","volume":"296","author":"Watts","year":"2002","journal-title":"Science"},{"key":"10.3233\/IDA-184238_ref58","doi-asserted-by":"crossref","unstructured":"W.E. Winkler, Overview of record linkage and current research directions, in: Bureau of the Census, Citeseer, 2006.","DOI":"10.1002\/9780470057339.var022"},{"key":"10.3233\/IDA-184238_ref59","doi-asserted-by":"crossref","unstructured":"A. Yates, M. Cafarella, M. Banko, O. Etzioni, M. Broadhead and S. Soderland, Textrunner: open information extraction on the web, in: Proceedings of Human Language Technologies: The Annual Conference of the North American Chapter of the Association for Computational Linguistics: Demonstrations, Association for Computational Linguistics, 2007, pp. 25\u201326.","DOI":"10.3115\/1614164.1614177"},{"key":"10.3233\/IDA-184238_ref60","first-page":"1083","article-title":"Kernel methods for relation extraction","volume":"3","author":"Zelenko","year":"2003","journal-title":"The Journal of Machine Learning Research"},{"key":"10.3233\/IDA-184238_ref61","doi-asserted-by":"crossref","unstructured":"S. Zhao and R. Grishman, Extracting relations with integrated information using kernel methods, in: ACL, The Association for Computer Linguistics, 2005.","DOI":"10.3115\/1219840.1219892"}],"container-title":["Intelligent Data Analysis"],"original-title":[],"link":[{"URL":"https:\/\/content.iospress.com\/download?id=10.3233\/IDA-184238","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,11]],"date-time":"2025-03-11T04:37:22Z","timestamp":1741667842000},"score":1,"resource":{"primary":{"URL":"https:\/\/journals.sagepub.com\/doi\/full\/10.3233\/IDA-184238"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,10,24]]},"references-count":48,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.3233\/ida-184238","relation":{},"ISSN":["1088-467X","1571-4128"],"issn-type":[{"type":"print","value":"1088-467X"},{"type":"electronic","value":"1571-4128"}],"subject":[],"published":{"date-parts":[[2019,10,24]]}}}