{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T17:38:37Z","timestamp":1778693917769,"version":"3.51.4"},"publisher-location":"Cham","reference-count":26,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031632266","type":"print"},{"value":"9783031632273","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-63227-3_12","type":"book-chapter","created":{"date-parts":[[2024,6,22]],"date-time":"2024-06-22T08:02:02Z","timestamp":1719043322000},"page":"177-189","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Scalable Data Profiling for Quality Analytics Extraction"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8054-5670","authenticated-orcid":false,"given":"Anastasios","family":"Nikolakopoulos","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3865-2277","authenticated-orcid":false,"given":"Efthymios","family":"Chondrogiannis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0460-4648","authenticated-orcid":false,"given":"Efstathios","family":"Karanastasis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7633-095X","authenticated-orcid":false,"given":"Mar\u00eda Jos\u00e9 L\u00f3pez","family":"Osa","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1761-5170","authenticated-orcid":false,"given":"Jordi Arjona","family":"Aroca","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1131-5011","authenticated-orcid":false,"given":"Michalis","family":"Kefalogiannis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vasiliki","family":"Apostolopoulou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Efstathia","family":"Deligeorgi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7677-7957","authenticated-orcid":false,"given":"Vasileios","family":"Siopidis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Theodora","family":"Varvarigou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,6,23]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"Abedjan, Z., Golab, L., Naumann, F.: Data profiling: a tutorial. In: Proceedings of the 2017 ACM International Conference on Management of Data, pp. 1747\u20131751 (2017)","DOI":"10.1145\/3035918.3054772"},{"key":"12_CR2","unstructured":"Agrawal, Y.: The accelerating pace of technological trends \u2013 adapting to market dynamics as an it professionals \u2013 web article (2023). https:\/\/www.linkedin.com\/pulse\/accelerating-pace-technological-trends-adapting-market-yash-agrawal"},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Altendeitering, M., Fraunhofer, I., Guggenberger, T.M.: Data quality tools: towards a software reference architecture (2024)","DOI":"10.24251\/HICSS.2023.740"},{"key":"12_CR4","unstructured":"Apache: Apache flink \u2013 framework. https:\/\/flink.apache.org"},{"key":"12_CR5","unstructured":"Apache: Apache spark \u2013 framework. https:\/\/spark.apache.org"},{"key":"12_CR6","unstructured":"Apache: Apache storm \u2013 framework. https:\/\/storm.apache.org"},{"key":"12_CR7","unstructured":"Apache: Pyspark overview \u2013 introduction. https:\/\/spark.apache.org\/docs\/latest\/api\/python\/index.html"},{"key":"12_CR8","doi-asserted-by":"publisher","unstructured":"Couto, J.C., Damasio, J., Bordini, R., Ruiz, D.: New trends in big data profiling. In: Science and Information Conference, pp. 808\u2013825. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-10461-9_55","DOI":"10.1007\/978-3-031-10461-9_55"},{"key":"12_CR9","doi-asserted-by":"crossref","unstructured":"Dai, W., Wardlaw, I., Cui, Y., Mehdi, K., Li, Y., Long, J.: Data profiling technology of data governance regarding big data: review and rethinking. In: Information Technology: New Generations: 13th International Conference on Information Technology, pp. 439\u2013450. Springer (2016)","DOI":"10.1007\/978-3-319-32467-8_39"},{"key":"12_CR10","unstructured":"Duarte, F.: Amount of data created daily - web article (2024). https:\/\/ explodingtopics.com\/blog\/data-generated-per-day"},{"key":"12_CR11","unstructured":"Economist: The world\u2019s most valuable resource is no longer oil, but data - web article (2017). https:\/\/www.economist.com\/leaders\/2017\/05\/06\/the-worlds-most-valuable-resource-is-no-longer-oil-but-data"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Elbaghazaoui, B.E., Amnai, M., Semmouri, A.: Data profiling over big data area: a survey of big data profiling: state-of-the-art, use cases and challenges. In: Intelligent Systems in Big Data, Semantic Web and Machine Learning, pp. 111\u2013123. Springer (2021)","DOI":"10.1007\/978-3-030-72588-4_8"},{"issue":"1","key":"12_CR13","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s41044-016-0020-2","volume":"2","author":"D Garc\u00eda-Gil","year":"2017","unstructured":"Garc\u00eda-Gil, D., Ram\u00edrez-Gallego, S., Garc\u00eda, S., Herrera, F.: A comparison on scalability for batch big data processing on apache spark and apache flink. Big Data Analytics 2(1), 1\u201311 (2017)","journal-title":"Big Data Analytics"},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Gupta, H.K., Parveen, R.: Comparative study of big data frameworks. In: 2019 International Conference on Issues and Challenges in Intelligent Computing Techniques (ICICT). vol. 1, pp. 1\u20134. IEEE (2019)","DOI":"10.1109\/ICICT46931.2019.8977680"},{"key":"12_CR15","unstructured":"IBM: What is data profiling? - web article. https:\/\/www.ibm.com\/topics\/data-profiling"},{"key":"12_CR16","doi-asserted-by":"publisher","first-page":"72713","DOI":"10.1109\/ACCESS.2020.2988120","volume":"8","author":"Z Liu","year":"2020","unstructured":"Liu, Z., Zhang, A.: Sampling for big data profiling: a survey. IEEE Access 8, 72713\u201372726 (2020)","journal-title":"IEEE Access"},{"key":"12_CR17","unstructured":"Liu, Z., Zhang, A.: A survey on sampling and profiling over big data (technical report). arXiv preprint arXiv:2005.05079 (2020)"},{"key":"12_CR18","doi-asserted-by":"crossref","unstructured":"Marcu, O.C., Costan, A., Antoniu, G., P\u00e9rez-Hern\u00e1ndez, M.S.: Spark versus flink: understanding performance in big data analytics frameworks. In: 2016 IEEE International Conference on Cluster Computing (CLUSTER), pp. 433\u2013442. IEEE (2016)","DOI":"10.1109\/CLUSTER.2016.22"},{"key":"12_CR19","doi-asserted-by":"publisher","unstructured":"Marinakis, A., et al.: Efficient data management and interoperability middleware in business-oriented smart port use cases. In: IFIP International Conference on Artificial Intelligence Applications and Innovations, pp. 108\u2013119. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-08341-9_10","DOI":"10.1007\/978-3-031-08341-9_10"},{"key":"12_CR20","doi-asserted-by":"crossref","unstructured":"Nagpal, A., Gabrani, G.: Python for data analytics, scientific and technical applications. In: 2019 Amity International Conference on Artificial Intelligence (AICAI), pp. 140\u2013145. IEEE (2019)","DOI":"10.1109\/AICAI.2019.8701341"},{"key":"12_CR21","doi-asserted-by":"crossref","unstructured":"Nikiforova, A.: Definition and evaluation of data quality: User-oriented data object- driven approach to data quality assessment. Baltic J. Mod. Comput. 8(3) (2020)","DOI":"10.22364\/bjmc.2020.8.3.02"},{"issue":"11","key":"12_CR22","doi-asserted-by":"publisher","first-page":"218","DOI":"10.3390\/computers12110218","volume":"12","author":"A Nikolakopoulos","year":"2023","unstructured":"Nikolakopoulos, A., et al.: Bigdam: Efficient big data management and interoperability middleware for seaports as critical infrastructures. Computers 12(11), 218 (2023)","journal-title":"Computers"},{"key":"12_CR23","unstructured":"OTE: Ote group of companies. https:\/\/www.cosmote.gr\/cs\/otegroup\/\\en\/omilos{_}ote.html"},{"key":"12_CR24","unstructured":"van Rossum, G.: Python - programming language, https:\/\/www.python.org"},{"key":"12_CR25","doi-asserted-by":"crossref","unstructured":"Taleb, I., Serhani, M.A., Dssouli, R.: Big data quality: a data quality profiling model. In: World Congress on Services, pp. 61\u201377. Springer (2019)","DOI":"10.1007\/978-3-030-23381-5_5"},{"key":"12_CR26","doi-asserted-by":"crossref","unstructured":"Veiga, J., Exp\u00f3sito, R.R., Pardo, X.C., Taboada, G.L., Tourifio, J.: Performance evaluation of big data frameworks for large-scale data analytics. In: 2016 IEEE International Conference on Big Data (Big Data), pp. 424\u2013431. IEEE (2016)","DOI":"10.1109\/BigData.2016.7840633"}],"container-title":["IFIP Advances in Information and Communication Technology","Artificial Intelligence Applications and Innovations. AIAI 2024 IFIP WG 12.5 International Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-63227-3_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T12:21:27Z","timestamp":1732278087000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-63227-3_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031632266","9783031632273"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-63227-3_12","relation":{},"ISSN":["1868-4238","1868-422X"],"issn-type":[{"value":"1868-4238","type":"print"},{"value":"1868-422X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"23 June 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AIAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"IFIP International Conference on Artificial Intelligence Applications and Innovations","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Corfu","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Greece","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 June 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 June 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aiai2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ifipaiai.org\/2024\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}