{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,16]],"date-time":"2025-12-16T12:43:47Z","timestamp":1765889027915,"version":"3.37.3"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2023,1,11]],"date-time":"2023-01-11T00:00:00Z","timestamp":1673395200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,11]],"date-time":"2023-01-11T00:00:00Z","timestamp":1673395200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001665","name":"Agence Nationale de la Recherche","doi-asserted-by":"publisher","award":["ANR-17-CE23-0018"],"award-info":[{"award-number":["ANR-17-CE23-0018"]}],"id":[{"id":"10.13039\/501100001665","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2023,2]]},"DOI":"10.1007\/s10994-022-06277-7","type":"journal-article","created":{"date-parts":[[2023,1,11]],"date-time":"2023-01-11T21:02:36Z","timestamp":1673470956000},"page":"687-720","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Relational data embeddings for feature enrichment with background information"],"prefix":"10.1007","volume":"112","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2643-1848","authenticated-orcid":false,"given":"Alexis","family":"Cvetkov-Iliev","sequence":"first","affiliation":[]},{"given":"Alexandre","family":"Allauzen","sequence":"additional","affiliation":[]},{"given":"Ga\u00ebl","family":"Varoquaux","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,1,11]]},"reference":[{"key":"6277_CR1","unstructured":"Ali, M., Berrendorf, M., Hoyt, C. T., Vermue, L., Galkin, M., Sharifzadeh, S., Fischer, A., Tresp, V., & Lehmann, J. (2020). Bringing light into the dark: A large-scale evaluation of knowledge graph embedding models under a unified framework. arXiv preprintarXiv:2006.13365."},{"key":"6277_CR2","unstructured":"Ali, M., Berrendorf, M., Hoyt, C. T., Vermue, L., Sharifzadeh, S., Tresp, V., & Lehmann, J. (2021). Pykeen 1.0: A python library for training and evaluating knowledge graph embeddings. Journal of Machine Learning Research, 22(82):1\u20136."},{"key":"6277_CR3","unstructured":"Arora, S., & Bedathur, S. (2020). On embeddings in relational databases. arXiv:2005.06437."},{"key":"6277_CR4","first-page":"4463","volume":"32","author":"I Balazevic","year":"2019","unstructured":"Balazevic, I., Allen, C., & Hospedales, T. (2019). Multi-relational poincar\u00e9 graph embeddings. Neural Information Processing Systems, 32, 4463.","journal-title":"Neural Information Processing Systems"},{"key":"6277_CR5","unstructured":"Bauer, F., & Kaltenb\u00f6ck, M. (2011). Linked open data: The essentials (Vol. 710). Edition mono\/monochrom, Vienna."},{"key":"6277_CR6","doi-asserted-by":"crossref","unstructured":"Bojanowski, P., Grave, E., Joulin, A., & Mikolov, T. (2016). Enriching word vectors with subword information. arXiv:1607.04606.","DOI":"10.1162\/tacl_a_00051"},{"key":"6277_CR7","doi-asserted-by":"crossref","unstructured":"Bordawekar, R., & Shmueli, O. (2017). Using word embedding to enable semantic queries in relational databases. In Proceedings of the 1st workshop on data management for end-to-end machine learning. DEEM.","DOI":"10.1145\/3076246.3076251"},{"key":"6277_CR8","unstructured":"Bordes, A., Usunier, N., Garcia-Dur\u00e1n, A., Weston, J., & Yakhnenko, O. (2013). Translating embeddings for modeling multi-relational data. In Neural information processing systems (p. 2787)."},{"key":"6277_CR9","doi-asserted-by":"crossref","unstructured":"Cappuzzo, R., Papotti, P., & Thirumuruganathan, S. (2020). Creating embeddings of heterogeneous relational datasets for data integration tasks. In SIGMOD (p. 1335).","DOI":"10.1145\/3318464.3389742"},{"key":"6277_CR10","doi-asserted-by":"crossref","unstructured":"Chen, L., Varoquaux, G., & Suchanek, F. (2022). Imputing out-of-vocabulary embeddings with love makes language models robust with little cost. In ACL 2022-60th annual meeting of the association for computational linguistics.","DOI":"10.18653\/v1\/2022.acl-long.245"},{"key":"6277_CR11","doi-asserted-by":"crossref","unstructured":"Cochez, M., Ristoski, P., Ponzetto, S.\u00a0P., & Paulheim, H. (2017). Global rdf vector space embeddings. In International semantic web conference (pp. 190\u2013207). Springer.","DOI":"10.1007\/978-3-319-68288-4_12"},{"key":"6277_CR12","unstructured":"CrowdFlower. (2016). Data science report. Retrieved from https:\/\/visit.figure-eight.com\/rs\/416-ZBE-142\/images\/CrowdFlower_DataScienceReport_2016.pdf."},{"key":"6277_CR13","doi-asserted-by":"crossref","unstructured":"d\u2019Amato, C., Quatraro, N.\u00a0F., & Fanizzi, N. (2021). Injecting background knowledge into embedding models for predictive tasks on knowledge graphs. In 18th extended semantic web conference\u2014research track.","DOI":"10.1007\/978-3-030-77385-4_26"},{"key":"6277_CR14","unstructured":"DBPedia web page. Retrieved November 18, 2021, from https:\/\/www.dbpedia.org\/resources\/latest-core"},{"key":"6277_CR15","doi-asserted-by":"crossref","unstructured":"Egami, S., Nishimura, S., & Fukuda, K. (2021). A framework for constructing and augmenting knowledge graphs using virtual space: Towards analysis of daily activities. In 2021 IEEE 33rd international conference on tools with artificial intelligence (ICTAI) (pp. 1226\u20131230).","DOI":"10.1109\/ICTAI52525.2021.00194"},{"issue":"4","key":"6277_CR16","doi-asserted-by":"publisher","first-page":"617","DOI":"10.3233\/SW-200404","volume":"12","author":"GA Gesese","year":"2021","unstructured":"Gesese, G. A., Biswas, R., Alam, M., & Sack, H. (2021). A survey on knowledge graph embeddings with literals: Which model links better literal-ly? Semantic Web, 12(4) 617\u2013647. https:\/\/doi.org\/10.3233\/SW-200404","journal-title":"Semantic Web"},{"key":"6277_CR17","doi-asserted-by":"crossref","unstructured":"Grohe, M. (2020). Word2vec, node2vec, graph2vec, x2vec: Towards a theory of vector embeddings of structured data. In Proceedings of the 39th ACM SIGMOD-SIGACT-SIGAI symposium on principles of database systems, PODS\u201920.","DOI":"10.1145\/3375395.3387641"},{"key":"6277_CR18","unstructured":"Kaggle Machine Learning & Data Science Survey (2017). https:\/\/www.kaggle.com\/ash316\/novice-to-grandmaster."},{"key":"6277_CR19","doi-asserted-by":"crossref","unstructured":"Kanter, J.\u00a0M., & Veeramachaneni, K. (2015). Deep feature synthesis: Towards automating data science endeavors. In IEEE international conference on data science and advanced analytics (DSAA) (pp. 1\u201310).","DOI":"10.1109\/DSAA.2015.7344858"},{"key":"6277_CR20","doi-asserted-by":"crossref","unstructured":"Kramer, S., Lavra\u010d, N., & Flach, P. (2001). Propositionalization approaches to relational data mining, (pp. 262\u2013286). Springer.","DOI":"10.1007\/978-3-662-04599-2_11"},{"key":"6277_CR21","doi-asserted-by":"crossref","unstructured":"Kristiadi, A., Khan, M. A., Lukovnikov, D., Lehmann, J., & and Fischer, A. (2019). Incorporating literals into knowledge graph embeddings. In International Semantic Web Conference (pp. 347\u2013363). Springer, Cham.","DOI":"10.1007\/978-3-030-30793-6_20"},{"key":"6277_CR22","doi-asserted-by":"crossref","unstructured":"Lam, H. T., Buesser, B., Min, H., Minh, T. N., Wistuba, M., Khurana, U., Bramble, G., Salonidis, T., Wang, D., & Samulowitz, H. (2021). Automated data science for relational data. In International Conference on Data Engineering (ICDE) (p. 2689). IEEE.","DOI":"10.1109\/ICDE51399.2021.00305"},{"key":"6277_CR23","unstructured":"Lam, H. T., Minh, T. N., Sinn, M., Buesser, B., & Wistuba, M. (2019). Neural feature learning from relational database. arXiv:1801.05372."},{"key":"6277_CR24","unstructured":"Lam, H. T., Thiebaut, J. M., Sinn, M., Chen, B., Mai, T., & Alkan, O. (2017). One button machine for automating feature engineering in relational databases. arXiv:1706.00327."},{"issue":"7","key":"6277_CR25","doi-asserted-by":"publisher","first-page":"1465","DOI":"10.1007\/s10994-020-05890-8","volume":"109","author":"N Lavra\u010d","year":"2020","unstructured":"Lavra\u010d, N., \u0160krlj, B., & Robnik-\u0160ikonja, M. (2020). Propositionalization and embeddings: Two sides of the same coin. Machine Learning, 109(7), 1465\u20131507.","journal-title":"Machine Learning"},{"key":"6277_CR26","doi-asserted-by":"publisher","first-page":"167","DOI":"10.3233\/SW-140134","volume":"6","author":"J Lehmann","year":"2015","unstructured":"Lehmann, J., Isele, R., Jakob, M., Jentzsch, A., Kontokostas, D., Mendes, P. N., et al. (2015). Dbpedia\u2014A large-scale, multilingual knowledge base extracted from Wikipedia. Semantic Web, 6, 167.","journal-title":"Semantic Web"},{"key":"6277_CR27","unstructured":"Mahdisoltani, F., Biega, J., & Suchanek, F. (2013). YAGO3: A knowledge base from multilingual Wikipedias. In CIDR."},{"key":"6277_CR28","doi-asserted-by":"publisher","first-page":"636","DOI":"10.14778\/2947618.2947620","volume":"9","author":"W Mann","year":"2016","unstructured":"Mann, W., Augsten, N., & Bouros, P. (2016). An empirical evaluation of set similarity join techniques. Proceedings of the VLDB Endowment, 9, 636.","journal-title":"Proceedings of the VLDB Endowment"},{"key":"6277_CR29","unstructured":"Mikolov, T., Sutskever, I., Chen, K., Corrado, G. S., & Dean, J. (2013). Distributed representations of words and phrases and their compositionality. In Advances in Neural Information Processing Systems (p. 3111)."},{"key":"6277_CR30","doi-asserted-by":"publisher","unstructured":"MIT Election Data Science Lab. (2018). County presidential election returns 2000\u20132020. Harvard Dataverse. https:\/\/doi.org\/10.7910\/DVN\/VOQCHQ","DOI":"10.7910\/DVN\/VOQCHQ"},{"key":"6277_CR31","unstructured":"Moosavi, S., Samavatian, M.H., Parthasarathy, S., & Ramnath, R. (2019). A countrywide traffic accident dataset. arXiv:1906.05409."},{"key":"6277_CR32","unstructured":"Paulheim, H. (2013). Exploiting linked open data as background knowledge in data mining. In Proceedings of the 2013 international conference on data mining on linked data, DMoLD\u201913 (pp. 1\u201310)."},{"key":"6277_CR33","doi-asserted-by":"crossref","unstructured":"Paulheim, H., & F\u00fcmkranz, J. (2012). Unsupervised generation of data mining features from linked open data. In Proceedings of the 2nd international conference on web intelligence, mining and semantics, WIMS \u201912.","DOI":"10.1145\/2254129.2254168"},{"key":"6277_CR34","doi-asserted-by":"crossref","unstructured":"Pellissier Tanon, T., Weikum, G., & Suchanek, F. (2020). Yago 4: A reason-able knowledge base. In A. Harth, S. Kirrane, A.-C. Ngonga Ngomo, H. Paulheim, A. Rula, A. L. Gentile, et al. (Eds.), The semantic web (pp. 583\u2013596). Springer.","DOI":"10.1007\/978-3-030-49461-2_34"},{"key":"6277_CR35","doi-asserted-by":"crossref","unstructured":"Pinter, Y., Guthrie, R., & Eisenstein, J. (2017). Mimicking word embeddings using subword RNNs. arXiv:1707.06961.","DOI":"10.18653\/v1\/D17-1010"},{"issue":"3","key":"6277_CR36","doi-asserted-by":"publisher","first-page":"399","DOI":"10.3233\/SW-212892","volume":"13","author":"J Portisch","year":"2022","unstructured":"Portisch, J., Heist, N., & Paulheim, H. (2022). Knowledge graph embedding for data mining vs. knowledge graph embedding for link prediction\u2014Two sides of the same coin? Semantic Web, 13(3), 399\u2013422. https:\/\/doi.org\/10.3233\/SW-212892.","journal-title":"Semantic Web"},{"key":"6277_CR37","unstructured":"Ristoski, P., & Paulheim, H. (2014). A comparison of propositionalization strategies for creating features from linked open data. Linked Data for Knowledge Discovery, 6."},{"key":"6277_CR38","doi-asserted-by":"crossref","unstructured":"Ristoski, P., & Paulheim, H. (2016). Rdf2vec: Rdf graph embeddings for data mining. In SEMWEB.","DOI":"10.1007\/978-3-319-46523-4_30"},{"key":"6277_CR39","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.websem.2016.01.001","volume":"36","author":"P Ristoski","year":"2016","unstructured":"Ristoski, P., & Paulheim, H. (2016). Semantic web in data mining and knowledge discovery: A comprehensive survey. Journal of Web Semantics, 36, 1\u201322.","journal-title":"Journal of Web Semantics"},{"key":"6277_CR40","doi-asserted-by":"publisher","first-page":"721","DOI":"10.3233\/SW-180317","volume":"10","author":"P Ristoski","year":"2019","unstructured":"Ristoski, P., Rosati, J., Noia, T. D., De Leone, R., & Paulheim, H. (2019). Rdf2vec: Rdf graph embeddings and their applications. Semantic Web, 10, 721.","journal-title":"Semantic Web"},{"key":"6277_CR41","doi-asserted-by":"crossref","unstructured":"Saeed, M.\u00a0R., & Prasanna, V.\u00a0K. (2018). Extracting entity-specific substructures for RDF graph embedding. In 2018 IEEE international conference on information reuse and integration (IRI) (pp. 378\u2013385).","DOI":"10.1109\/IRI.2018.00063"},{"key":"6277_CR42","doi-asserted-by":"crossref","unstructured":"Silva, Y.\u00a0N., Aref, W.\u00a0G., & Ali, M.\u00a0H. (2010). The similarity join database operator. In International conference on data engineering (ICDE) (p. 892). IEEE.","DOI":"10.1109\/ICDE.2010.5447873"},{"issue":"1","key":"6277_CR43","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12859-019-3296-1","volume":"21","author":"R Sousa","year":"2020","unstructured":"Sousa, R., Silva, S., & Pesquita, C. (2020). Evolving knowledge graph similarity for supervised learning in complex biomedical domains. BMC Bioinformatics, 21(1), 1\u201319. https:\/\/doi.org\/10.1186\/s12859-019-3296-1","journal-title":"BMC Bioinformatics"},{"key":"6277_CR44","unstructured":"Sun, Z., Deng, Z. H., Nie, J. Y., & Tang, J. (2019). Rotate: Knowledge graph embedding by relational rotation in complex space. In International Conference on Learning Representations"},{"key":"6277_CR45","doi-asserted-by":"crossref","unstructured":"Vandewiele, G., Steenwinckel, B., Agozzino, T., & Ongenae, F. (2022). pyrdf2vec: A python implementation and extension of rdf2vec. arXiv:2205.02283.","DOI":"10.1007\/978-3-031-33455-9_28"},{"key":"6277_CR46","doi-asserted-by":"crossref","unstructured":"Vandewiele, G., Steenwinckel, B., Bonte, P., Weyns, M., Paulheim, H., Ristoski, P., De Turck, F., & Ongenae, F. (2020). Walk extraction strategies for node embeddings with rdf2vec in knowledge graphs. arXiv:2009.04404.","DOI":"10.1007\/978-3-030-87101-7_8"},{"key":"6277_CR47","doi-asserted-by":"publisher","first-page":"2724","DOI":"10.1109\/TKDE.2017.2754499","volume":"29","author":"Q Wang","year":"2017","unstructured":"Wang, Q., Mao, Z., Wang, B., & Guo, L. (2017). Knowledge graph embedding: A survey of approaches and applications. IEEE Transactions on Knowledge and Data Engineering, 29, 2724.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"6277_CR48","doi-asserted-by":"crossref","unstructured":"Wu, Y., & Wang, Z. (2018). Knowledge graph embedding with numeric attributes of entities. In Workshop on representation learning for NLP (p. 132).","DOI":"10.18653\/v1\/W18-3017"},{"key":"6277_CR49","unstructured":"Yang, B., Yih, W. T., He, X., Gao, J., & Deng, L. (2015). Embedding entities and relations for learning and inference in knowledge bases. In International Conference on Learning Representations."},{"key":"6277_CR50","doi-asserted-by":"crossref","unstructured":"Zhang, L., Zhang, S., & Balog, K. (2019). Table2vec: Neural word and entity embeddings for table population and retrieval. In Proceedings of the 42nd international ACM SIGIR conference on research and development in information retrieval (p. 1029).","DOI":"10.1145\/3331184.3331333"},{"key":"6277_CR51","doi-asserted-by":"crossref","unstructured":"Zheng, D., Song, X., Ma, C., Tan, Z., Ye, Z., Dong, J., Xiong, H., Zhang, Z. and Karypis, G. (2020). Dgl-ke: Training knowledge graph embeddings at scale. In Proceedings of the 43rd international ACM SIGIR conference on research and development in information retrieval (pp. 739\u2013748).","DOI":"10.1145\/3397271.3401172"},{"key":"6277_CR52","unstructured":"Zillow. (2021). Home value index. Retrieved July 31, 2021, from https:\/\/www.zillow.com\/research\/data\/."}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-022-06277-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-022-06277-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-022-06277-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,11]],"date-time":"2024-01-11T01:03:42Z","timestamp":1704935022000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-022-06277-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1,11]]},"references-count":52,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,2]]}},"alternative-id":["6277"],"URL":"https:\/\/doi.org\/10.1007\/s10994-022-06277-7","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"type":"print","value":"0885-6125"},{"type":"electronic","value":"1573-0565"}],"subject":[],"published":{"date-parts":[[2023,1,11]]},"assertion":[{"value":"15 February 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 September 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 November 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 January 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}