{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,25]],"date-time":"2026-01-25T03:19:14Z","timestamp":1769311154328,"version":"3.49.0"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319672618","type":"print"},{"value":"9783319672625","type":"electronic"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-67262-5_10","type":"book-chapter","created":{"date-parts":[[2017,8,31]],"date-time":"2017-08-31T17:04:46Z","timestamp":1504199086000},"page":"125-139","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Data Preparation as a Service Based on Apache Spark"],"prefix":"10.1007","author":[{"given":"Nivethika","family":"Mahasivam","sequence":"first","affiliation":[]},{"given":"Nikolay","family":"Nikolov","sequence":"additional","affiliation":[]},{"given":"Dina","family":"Sukhobok","sequence":"additional","affiliation":[]},{"given":"Dumitru","family":"Roman","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,9,1]]},"reference":[{"key":"10_CR1","doi-asserted-by":"crossref","unstructured":"Atzmueller, M., Oussena, S., Roth-Berghofe, T.: Data preparation for big data analytics: methods and experiences. In: Enterprise Big Data Engineering, Analytics, and Management, pp. 157\u2013170. IGI Global (2016)","DOI":"10.4018\/978-1-5225-0293-7.ch010"},{"issue":"4","key":"10_CR2","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1177\/1473871611415994","volume":"10","author":"S Kandel","year":"2011","unstructured":"Kandel, S., Heer, J., Plaisant, C., Kennedy, J., Ham, F.V., Riche, N.H., Buono, P.: Research directions on data wrangling: visualizations and transformations. Inf. Vis. 10(4), 271\u2013288 (2011)","journal-title":"Inf. Vis."},{"key":"10_CR3","doi-asserted-by":"crossref","unstructured":"Krishnan, S., Franklin, M.J., Goldberg, K., Wu, E.: ActiveClean: an interactive data cleaning framework for modern machine learning. In: International Conference on Management of Data, San Francisco, California, USA. ACM (2016)","DOI":"10.1145\/2882903.2899409"},{"key":"10_CR4","unstructured":"McKinney, W.: Pandas: A Foundational Python Library for DataAnalysis and Statistics. NEM (Networked & Electronic Media) (2011)"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Jackson, C.J., Vijayakumar, V., Quadir, A.M., Bharathi, C.: Survey on programming models and environments for cluster cloud, and grid computing that defends big data. In: Procedia Computer Science, 2nd International Symposium on Big Data and Cloud Computing (ISBCC 2015), pp. 517\u2013523 (2015)","DOI":"10.1016\/j.procs.2015.04.025"},{"issue":"12","key":"10_CR6","doi-asserted-by":"publisher","first-page":"2917","DOI":"10.1109\/TVCG.2012.219","volume":"18","author":"S Kandel","year":"2012","unstructured":"Kandel, S., Paepcke, A., Hellerstein, J., Heer, J.: Enterprise data analysis and visualization: an interview study. IEEE Trans. Vis. Comput. Graph. 18(12), 2917\u20132926 (2012)","journal-title":"IEEE Trans. Vis. Comput. Graph."},{"key":"10_CR7","first-page":"134","volume":"2016","author":"D Sukhobok","year":"2016","unstructured":"Sukhobok, D., Nikolov, N., Pultier, A., Ye, X., Berre, A., Moynihan, R., Roberts, B., Elves\u00e6ter, B., Mahasivam, N., Roman, D.: Tabular data cleaning and linked data generation with Grafterizer. ESWC (Satell. Events) 2016, 134\u2013139 (2016)","journal-title":"ESWC (Satell. Events)"},{"key":"10_CR8","doi-asserted-by":"publisher","unstructured":"Roman, D., Nikolov, N., Putlier, A., Sukhobok, D., Elves\u00e6ter, B., Berre, A.J., Ye, X., Dimitrov, M., Simov, A., Zarev, M., Moynihan, R., Roberts, B., Berlocher, I., Kim, S., Lee, T., Smith, A., Heath, T.: DataGraft: one-stop-shop for open data management. Semantic Web J. (SWJ) \u2013 Interoperability, Usability, Applicability (2017, to appear). doi:10.3233\/SW-170263. Published and printed by IOS Press, ISSN 1570-0844","DOI":"10.3233\/SW-170263"},{"key":"10_CR9","doi-asserted-by":"crossref","unstructured":"Roman, D., Dimitrov, M., Nikolov, N., Putlier, A., Sukhobok, D., Elves\u00e6ter, B., Berre, A.J., Ye, X., Simov, A., Petkov, Y.: DataGraft: simplifying open data publishing. ESWC (Satell. Events) 2016, 101\u2013106 (2016)","DOI":"10.1007\/978-3-319-47602-5_21"},{"key":"10_CR10","doi-asserted-by":"crossref","unstructured":"Roman, D., Dimitrov, M., Nikolov, N., Putlier, A., Elves\u00e6ter, B., Simov, A., Petkov, Y.: DataGraft: a platform for open data publishing. In: The Joint Proceedings of the 4th International Workshop on Linked Media and the 3rd Developers Hackshop, (LIME\/SemDev@ESWC 2016)","DOI":"10.1007\/978-3-319-47602-5_21"},{"key":"10_CR11","unstructured":"Wang, J., Crawl, D., Altintas, I., Tzoumas, K., Markl, V.: Comparison of distributed data-parallelization patterns for big data analysis: a bioinformatics case study. In: Proceedings of the Fourth International Workshop on Data Intensive Computing in the Clouds (DataCloud) (2013)"},{"key":"10_CR12","doi-asserted-by":"crossref","unstructured":"Ekanayake, J., Li, H., Zhang, B., Gunarathne, T., Bae, S.-H., Qiu, J., Fox, G.: Twister: a runtime for iterative MapReduce. In: Proceedings of the 19th ACM International Symposium on High Performance Distributed Computing (2010)","DOI":"10.1145\/1851476.1851593"},{"key":"10_CR13","doi-asserted-by":"crossref","unstructured":"Bala, M., Boussaid, O., Alimazighi, Z.: Big-ETL: extracting-transforming-loading approach for big data. In: Proceedings of International Conference on Parallel and Distributed Processing Techniques and Applications, Las Vegas, Neveda, USA (2015)","DOI":"10.4018\/IJDSST.2016100104"},{"key":"10_CR14","unstructured":"Krukowski, A., Kompatsiaris, Y., Papadopoulos, S., et al.: Big and Open Data Position Paper (2013). https:\/\/nem-initiative.org\/wp-content\/uploads\/2013\/11\/NEM-PP-016.pdf"},{"key":"10_CR15","doi-asserted-by":"crossref","unstructured":"Akidau, T., Bradshaw, R., Chambers, C., Chernyak, S., Lax, R., Whittle, S.: The dataflow model: a practical approach to balancing correctness, latency, and cost in massive-scale, unbounded, out-of-order data processing. In: Proceedings of the 41st International Conference on Very Large Data Bases, pp. 1792\u20131803, VLDB Endowment, Kohala Coast, Hawaii (2015)","DOI":"10.14778\/2824032.2824076"},{"key":"10_CR16","unstructured":"Sims, M., Kurose, J.F., Lesser, V.R.: Streaming versus batch processing of sensor data in a hazardous weather detection system. In: Proceedings of Second Annual IEEE Communications Society Conference on Sensor and Ad Hoc Communications and Networks (SECON 2005) (2005)"},{"issue":"4","key":"10_CR17","doi-asserted-by":"publisher","first-page":"117","DOI":"10.3390\/computers3040117","volume":"3","author":"S Shahrivari","year":"2014","unstructured":"Shahrivari, S.: Beyond batch processing: towards real-time and streaming big data. Computers 3(4), 117\u2013129 (2014)","journal-title":"Computers"},{"key":"10_CR18","unstructured":"Furche, T., Gottlob, G., Neumayr, B., Sallinger, E.: Data wrangling for big data: towards a lingua franca for data wrangling (2016)"},{"key":"10_CR19","doi-asserted-by":"crossref","unstructured":"Armbrust, M., Xin, R.S., Lian, C., Huai, Y., Liu, D., Bradley, J.K., Meng, X., Kaftan, T., Franklin, M.J., Ghodsi, A., Zaharia, M.: Spark SQL: relational data processing in spark. In: Proceedings of the 2015 ACM SIGMOD International Conference on Management of Data, pp. 1383\u20131394. ACM (2015)","DOI":"10.1145\/2723372.2742797"},{"key":"10_CR20","unstructured":"Zaharia, M., Chowdhury, M., Das, T., Dave, A., Ma, J., McCauley, M., Franklin, M.J., Shenker, S., Stoica, I.: Resilient distributed datasets: a fault-tolerant abstraction for in-memory cluster computing. In: Proceedings of the 9th USENIX Conference on Networked Systems Design and Implementation, p. 2. USENIX Association (2012)"},{"key":"10_CR21","doi-asserted-by":"crossref","unstructured":"Sukhobok, D., Nikolov, N., Roman, D.: Tabular data anomaly patterns. In: 3rd International Conference on Big Data Innovations and Applications. Innovate-Data 2017 (2017, in press)","DOI":"10.1109\/Innovate-Data.2017.10"},{"key":"10_CR22","doi-asserted-by":"crossref","unstructured":"Riazi, S.: SparkGalaxy: workflow-based big data processing (2016)","DOI":"10.1109\/BigData.2016.7840993"},{"issue":"4","key":"10_CR23","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1145\/2935694.2935702","volume":"44","author":"H Wang","year":"2016","unstructured":"Wang, H., Li, M., Bu, Y., Li, J., Gao, H., Zhang, J.: Cleanix: a parallel big data cleaning system. ACM SIGMOD Rec. 44(4), 35\u201340 (2016)","journal-title":"ACM SIGMOD Rec."},{"issue":"11","key":"10_CR24","first-page":"66","volume":"3","author":"M Kaur","year":"2015","unstructured":"Kaur, M., Dhaliwal, G.: Performance comparison of map reduce and Apache Spark. Int. J. Comput. Sci. Eng. 3(11), 66\u201369 (2015)","journal-title":"Int. J. Comput. Sci. Eng."}],"container-title":["Lecture Notes in Computer Science","Service-Oriented and Cloud Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-67262-5_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,25]],"date-time":"2023-08-25T10:30:05Z","timestamp":1692959405000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-67262-5_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319672618","9783319672625"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-67262-5_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"1 September 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ESOCC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Service-Oriented and Cloud Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Oslo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Norway","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"esocc2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/esocc2017.ifi.uio.no\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}