{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T16:27:16Z","timestamp":1759336036668,"version":"3.40.3"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319915623"},{"type":"electronic","value":"9783319915630"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-91563-0_29","type":"book-chapter","created":{"date-parts":[[2018,5,16]],"date-time":"2018-05-16T05:53:18Z","timestamp":1526449998000},"page":"474-489","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":30,"title":["KAYAK: A Framework for Just-in-Time Data Preparation in a Data Lake"],"prefix":"10.1007","author":[{"given":"Antonio","family":"Maccioni","sequence":"first","affiliation":[]},{"given":"Riccardo","family":"Torlone","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,5,17]]},"reference":[{"key":"29_CR1","unstructured":"CKAN: The open source data portal software. http:\/\/ckan.org\/. Accessed Nov 2017"},{"key":"29_CR2","unstructured":"Tamr. http:\/\/www.tamr.com\/. Accessed Nov 2017"},{"key":"29_CR3","unstructured":"Trifacta. https:\/\/www.trifacta.com\/. Accessed Nov 2017"},{"key":"29_CR4","doi-asserted-by":"crossref","unstructured":"Agarwal, S., Mozafari, B., Panda, A., Milner, H., Madden, S., Stoica, I.: BlinkDB: queries with bounded errors and bounded response times on very large data. In: EuroSys, pp. 29\u201342 (2013)","DOI":"10.1145\/2465351.2465355"},{"issue":"12","key":"29_CR5","first-page":"1916","volume":"8","author":"AP Bhardwaj","year":"2015","unstructured":"Bhardwaj, A.P., Deshpande, A., Elmore, A.J., Karger, D.R., Madden, S., Parameswaran, A.G., Subramanyam, H., Wu, E., Zhang, R.: Collaborative data analytics with DataHub. PVLDB 8(12), 1916\u20131927 (2015)","journal-title":"PVLDB"},{"key":"29_CR6","unstructured":"Deng, D., Fernandez, R.C., Abedjan, Z., Wang, S., Stonebraker, M., Elmagarmid, A.K., Ilyas, I.F., Madden, S., Ouzzani, M., Tang, N.: The data civilizer system. In: CIDR (2017)"},{"key":"29_CR7","unstructured":"Ehrlich, J., Roick, M., Schulze, L., Zwiener, J., Papenbrock, T., Naumann, F.: Holistic data profiling: simultaneous discovery of various metadata. In: EDBT, pp. 305\u2013316 (2016)"},{"key":"29_CR8","unstructured":"Furche, T., Gottlob, G., Libkin, L., Orsi, G., Paton, N.W.: Data wrangling for big data: challenges and opportunities. In: EDBT, pp. 473\u2013478 (2016)"},{"key":"29_CR9","doi-asserted-by":"crossref","unstructured":"Hai, R., Geisler, S., Quix, C.: Constance: an intelligent data lake system. In: SIGMOD, pp. 2097\u20132100 (2016)","DOI":"10.1145\/2882903.2899389"},{"key":"29_CR10","doi-asserted-by":"crossref","unstructured":"Halevy, A.Y., Korn, F., Noy, N.F., Olston, C., Polyzotis, N., Roy, S., Whang, S.E.: Goods: organizing Google\u2019s datasets. In: SIGMOD (2016)","DOI":"10.1145\/2882903.2903730"},{"issue":"2","key":"29_CR11","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1145\/253262.253291","volume":"26","author":"Joseph M. Hellerstein","year":"1997","unstructured":"Hellerstein, J.M., Haas, P.J., Wang, H.J.: Online aggregation. In: SIGMOD, pp. 171\u2013182 (1997)","journal-title":"ACM SIGMOD Record"},{"key":"29_CR12","unstructured":"Hellerstein, J.M., Sreekanti, V., Gonzalez, J.E., Dalton, J., Dey, A., Nag, S., Ramachandran, K., Arora, S., Bhattacharyya, A., Das, S., Donsky, M., Fierro, G., She, C., Steinbach, C., Subramanian, V., Sun, E.: Ground: a data context service. In: CIDR (2017)"},{"key":"29_CR13","unstructured":"Heudecker, N., White, A.: The data lake fallacy: all water and little substance. Gartner Report G 264950 (2014)"},{"key":"29_CR14","doi-asserted-by":"crossref","unstructured":"Ilyas, I.F., Markl, V., Haas, P.J., Brown, P., Aboulnaga, A.: CORDS: automatic discovery of correlations and soft functional dependencies. In: SIGMOD, pp. 647\u2013658 (2004)","DOI":"10.1145\/1007568.1007641"},{"issue":"12","key":"29_CR15","first-page":"1853","volume":"10","author":"A Maccioni","year":"2017","unstructured":"Maccioni, A., Torlone, R.: Crossing the finish line faster when paddling the data lake with KAYAK. PVLDB 10(12), 1853\u20131856 (2017)","journal-title":"PVLDB"},{"issue":"12","key":"29_CR16","first-page":"1860","volume":"8","author":"T Papenbrock","year":"2015","unstructured":"Papenbrock, T., Bergmann, T., Finke, M., Zwiener, J., Naumann, F.: Data profiling with metanome. PVLDB 8(12), 1860\u20131863 (2015)","journal-title":"PVLDB"},{"issue":"10","key":"29_CR17","first-page":"1082","volume":"8","author":"T Papenbrock","year":"2015","unstructured":"Papenbrock, T., Ehrlich, J., Marten, J., Neubert, T., Rudolph, J., Sch\u00f6nberg, M., Zwiener, J., Naumann, F.: Functional dependency discovery: an experimental evaluation of seven algorithms. PVLDB 8(10), 1082\u20131093 (2015)","journal-title":"PVLDB"},{"key":"29_CR18","doi-asserted-by":"crossref","unstructured":"Papenbrock, T., Naumann, F.: A hybrid approach to functional dependency discovery. In: SIGMOD, pp. 821\u2013833 (2016)","DOI":"10.1145\/2882903.2915203"},{"issue":"3","key":"29_CR19","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1109\/MCSE.2007.53","volume":"9","author":"F P\u00e9rez","year":"2007","unstructured":"P\u00e9rez, F., Granger, B.E.: IPython: a system for interactive scientific computing. Comput. Sci. Eng. 9(3), 21\u201329 (2007)","journal-title":"Comput. Sci. Eng."},{"issue":"9","key":"29_CR20","first-page":"898","volume":"8","author":"N Potti","year":"2015","unstructured":"Potti, N., Patel, J.M.: DAQ: a new paradigm for approximate query processing. PVLDB 8(9), 898\u2013909 (2015)","journal-title":"PVLDB"},{"key":"29_CR21","unstructured":"Sarma, A.D., Fang, L., Gupta, N., Halevy, A.Y., Lee, H., Wu, F., Xin, R., Yu, C.: Finding related tables. In: SIGMOD (2012)"},{"key":"29_CR22","unstructured":"Stonebraker, M., Bruckner, D., Ilyas, I.F., Beskales, G., Cherniack, M., Zdonik, S.B., Pagan, A., Xu, S.: Data curation at scale: the data tamer system. In: CIDR (2013)"},{"key":"29_CR23","unstructured":"Terrizzano, I., Schwarz, P.M., Roth, M., Colino, J.E.: Data wrangling: the challenging journey from the wild to the lake. In: CIDR (2015)"},{"issue":"11","key":"29_CR24","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1145\/2934664","volume":"59","author":"M Zaharia","year":"2016","unstructured":"Zaharia, M., Xin, R.S., Wendell, P., Das, T., Armbrust, M., Dave, A., Meng, X., Rosen, J., Venkataraman, S., Franklin, M.J., Ghodsi, A., Gonzalez, J., Shenker, S., Stoica, I.: Apache spark: a unified engine for big data processing. Commun. ACM 59(11), 56\u201365 (2016)","journal-title":"Commun. ACM"}],"container-title":["Lecture Notes in Computer Science","Advanced Information Systems Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-91563-0_29","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T18:39:06Z","timestamp":1710268746000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-91563-0_29"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319915623","9783319915630"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-91563-0_29","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"17 May 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CAiSE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Advanced Information Systems Engineering","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tallinn","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Estonia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 June 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 June 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"caise2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/caise2018.ut.ee","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}