{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:06:35Z","timestamp":1757617595075,"version":"3.44.0"},"publisher-location":"Cham","reference-count":26,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031834714"},{"type":"electronic","value":"9783031834721"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-83472-1_10","type":"book-chapter","created":{"date-parts":[[2025,3,15]],"date-time":"2025-03-15T12:14:01Z","timestamp":1742040841000},"page":"141-155","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["About Relationships in\u00a0Data Lakes"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-1481-4662","authenticated-orcid":false,"given":"Ahlame","family":"Diouan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eric","family":"Ferey","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1491-384X","authenticated-orcid":false,"given":"J\u00e9r\u01d2me","family":"Darmont","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0494-0169","authenticated-orcid":false,"given":"Sabine","family":"Loudcher","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,3,16]]},"reference":[{"key":"10_CR1","unstructured":"Al-Serafi, A.M.M.: Dataset proximity mining for supporting schema matching and data lake governance. Ph.D. thesis, Universitat Polit\u00e8cnica de Catalunya (2021)"},{"key":"10_CR2","doi-asserted-by":"crossref","unstructured":"Alserafi, A., Calders, T., Abell\u00f3, A., Romero, O.: DS-prox: dataset proximity mining for governing the data lake. In: Proceedings of the 10th International Conference on Similarity Search & Applications (SISAP), pp. 284\u2013299 (2017)","DOI":"10.1007\/978-3-319-68474-1_20"},{"issue":"12","key":"10_CR3","doi-asserted-by":"publisher","first-page":"1942","DOI":"10.14778\/3229863.3236230","volume":"11","author":"A Beheshti","year":"2018","unstructured":"Beheshti, A., Benatallah, B., Nouri, R., Tabebordbar, A.: CoreKG: a knowledge lake service. Proc. VLDB Endow. 11(12), 1942\u20131945 (2018)","journal-title":"Proc. VLDB Endow."},{"key":"10_CR4","unstructured":"Deng, D., et al.: The data civilizer system. In: Proceedings of the 8th Biennial Conference on Innovative Data Systems Research (CIDR) (2017)"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Diamantini, C., et al.: A new metadata model to uniformly handle heterogeneous data lake sources. In: Proceedings of the 22nd ADBIS Short Papers & Workshops, AI*QA, BIGPMED, CSACDB, M2U, BigDataMAPS, ISTREND, Doctoral Consortium, pp. 165\u2013177 (2018)","DOI":"10.1007\/978-3-030-00063-9_17"},{"key":"10_CR6","unstructured":"Dixon, J.: Pentaho, Hadoop, and Data Lakes (2010). https:\/\/jamesdixon.wordpress.com\/2010\/10\/14\/pentaho-hadoop-and-data-lakes\/"},{"key":"10_CR7","doi-asserted-by":"crossref","unstructured":"Eichler, R., et al.: HANDLE - a generic metadata model for data lakes. In: Proceedings of the 22nd International Conference on Big Data Analytics & Knowledge Discovery (DAWAK), pp. 73\u201388 (2020)","DOI":"10.1007\/978-3-030-59065-9_7"},{"issue":"11","key":"10_CR8","doi-asserted-by":"publisher","first-page":"3377","DOI":"10.14778\/3611479.3611533","volume":"16","author":"MY Eltabakh","year":"2023","unstructured":"Eltabakh, M.Y., Kunjir, M., Elmagarmid, A., Ahmad, M.S.: Cross modal data discovery over structured and unstructured data lakes. Proc. VLDB Endow. 16(11), 3377\u20133390 (2023)","journal-title":"Proc. VLDB Endow."},{"issue":"7","key":"10_CR9","doi-asserted-by":"publisher","first-page":"1726","DOI":"10.14778\/3587136.3587146","volume":"16","author":"G Fan","year":"2022","unstructured":"Fan, G., et al.: Semantics-aware dataset discovery from data lakes with contextualized column-based representation learning. Proc. VLDB Endow. 16(7), 1726\u20131739 (2022)","journal-title":"Proc. VLDB Endow."},{"issue":"4","key":"10_CR10","doi-asserted-by":"publisher","first-page":"425","DOI":"10.1007\/s42979-023-01828-8","volume":"4","author":"AA Fernandes","year":"2023","unstructured":"Fernandes, A.A., et al.: Data preparation: a technological perspective and review. SN Comput. Sci. 4(4), 425 (2023)","journal-title":"SN Comput. Sci."},{"key":"10_CR11","unstructured":"Fernandez, R.C., et al.: Aurum: a data discovery system. In: Proceedings of the 34th IEEE International Conference on Data Engineering (ICDE), pp. 1001\u20131012 (2018)"},{"key":"10_CR12","doi-asserted-by":"crossref","unstructured":"Hai, R., Geisler, S., Quix, C.: Constance: an intelligent data lake system. In: Proceedings of the ACM International Conference on Management of Data (SIGMOD), pp. 2097\u20132100 (2016)","DOI":"10.1145\/2882903.2899389"},{"issue":"3","key":"10_CR13","first-page":"5","volume":"39","author":"AY Halevy","year":"2016","unstructured":"Halevy, A.Y., et al.: Managing Google\u2019s data lake: an overview of the Goods system. IEEE Data Eng. Bull. 39(3), 5\u201314 (2016)","journal-title":"IEEE Data Eng. Bull."},{"key":"10_CR14","doi-asserted-by":"crossref","unstructured":"Hoseini, S., Theissen-Lipp, J., Quix, C.: A survey on semantic data management as intersection of ontology-based data access, semantic modeling and data lakes. J. Web Semant. 100819 (2024)","DOI":"10.1016\/j.websem.2024.100819"},{"issue":"6","key":"10_CR15","doi-asserted-by":"publisher","first-page":"826","DOI":"10.14778\/3380750.3380754","volume":"13","author":"R Huang","year":"2020","unstructured":"Huang, R., et al.: Effective and efficient retrieval of structured entities. Proc. VLDB Endow. 13(6), 826\u2013839 (2020)","journal-title":"Proc. VLDB Endow."},{"issue":"1","key":"10_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3588929","volume":"1","author":"Y Kaminsky","year":"2023","unstructured":"Kaminsky, Y., Pena, E.H., Naumann, F.: Discovering similarity inclusion dependencies. Proc. ACM Manag. Data 1(1), 1\u201324 (2023)","journal-title":"Proc. ACM Manag. Data"},{"key":"10_CR17","doi-asserted-by":"crossref","unstructured":"Liu, J., Sun, S., Nargesian, F.: Causal dataset discovery with large language models. In: Proceedings of the ACM SIGMOD Workshop on Human-In-the-Loop Data Analytics (HILDA), pp.\u00a01\u20138 (2024)","DOI":"10.1145\/3665939.3665968"},{"key":"10_CR18","doi-asserted-by":"crossref","unstructured":"Miloslavskaya, N., Tolstoy, A.: Big data, fast data and data lake concepts. In: Proceedings of the 7th Annual International Conference on Biologically Inspired Cognitive Architectures (BICA), pp. 300\u2013305 (2016)","DOI":"10.1016\/j.procs.2016.07.439"},{"key":"10_CR19","doi-asserted-by":"crossref","unstructured":"Moher, D., et al.: Preferred reporting items for systematic review and meta-analysis protocols (PRISMA-P) 2015 statement. Syst. Rev. 4(1) (2015)","DOI":"10.1186\/2046-4053-4-1"},{"key":"10_CR20","doi-asserted-by":"crossref","unstructured":"Ravat, F., Zhao, Y.: Data lakes: trends and perspectives. In: Proceedings of the 30th International Conference on Database & Expert Systems Applications (DEXA), vol.\u00a0I, pp. 304\u2013313 (2019)","DOI":"10.1007\/978-3-030-27615-7_23"},{"key":"10_CR21","doi-asserted-by":"crossref","unstructured":"Ravat, F., Zhao, Y.: Metadata management for data lakes. In: Proceedings of the 23rd ADBIS Short Papers & Workshops: BBIGAP, QAUCA, SemBDM, SIMPDA, M2P, MADEISD, and Doctoral Consortium, pp. 37\u201344 (2019)","DOI":"10.1007\/978-3-030-30278-8_5"},{"key":"10_CR22","doi-asserted-by":"crossref","unstructured":"Santos, A., et al.: Correlation sketches for approximate join-correlation queries. In: Proceedings of the ACM International Conference on Management of Data (SIGMOD), pp. 1531\u20131544 (2021)","DOI":"10.1145\/3448016.3458456"},{"key":"10_CR23","doi-asserted-by":"crossref","unstructured":"Sawadogo, P.N., et al.: Metadata systems for data lakes: models and features. In: Proceedings of the 23rd ADBIS Short Papers & Workshops: BBIGAP, QAUCA, SemBDM, SIMPDA, M2P, MADEISD, and Doctoral Consortium, pp. 440\u2013451 (2019)","DOI":"10.1007\/978-3-030-30278-8_43"},{"key":"10_CR24","unstructured":"Scholly, E., et al.: Coining goldMEDAL: a new contribution to data lake generic metadata modeling. In: Proceedings of the 23rd International Workshop on Design, Optimization, Languages and Analytical Processing of Big Data (DOLAP), vol.\u00a02840, pp. 31\u201340 (2021)"},{"issue":"4","key":"10_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3626762","volume":"1","author":"R Shah","year":"2023","unstructured":"Shah, R., et al.: R2D2: reducing redundancy and duplication in data lakes. Proc. ACM Manag. Data 1(4), 1\u201325 (2023)","journal-title":"Proc. ACM Manag. Data"},{"issue":"10","key":"10_CR26","doi-asserted-by":"publisher","first-page":"2659","DOI":"10.14778\/3603581.3603602","volume":"16","author":"B Youngmann","year":"2023","unstructured":"Youngmann, B., Cafarella, M., Salimi, B., Zeng, A.: Causal data integration. Proc. VLDB Endow. 16(10), 2659\u20132665 (2023)","journal-title":"Proc. VLDB Endow."}],"container-title":["Lecture Notes in Computer Science","Database Engineered Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-83472-1_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T07:47:45Z","timestamp":1757144865000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-83472-1_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031834714","9783031834721"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-83472-1_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"16 March 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"IDEAS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Database Engineered Applications Symposium","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bayonne","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"France","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ideas-12024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conferences.sigappfr.org\/ideas2024\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}