{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T01:20:09Z","timestamp":1769822409222,"version":"3.49.0"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,12,15]],"date-time":"2021-12-15T00:00:00Z","timestamp":1639526400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,12,15]],"date-time":"2021-12-15T00:00:00Z","timestamp":1639526400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,12,15]],"date-time":"2021-12-15T00:00:00Z","timestamp":1639526400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,12,15]]},"DOI":"10.1109\/bigdata52589.2021.9671742","type":"proceedings-article","created":{"date-parts":[[2022,1,13]],"date-time":"2022-01-13T15:39:16Z","timestamp":1642088356000},"page":"3141-3146","source":"Crossref","is-referenced-by-count":6,"title":["Training Data Reduction for Performance Models of Data Analytics Jobs in the Cloud"],"prefix":"10.1109","author":[{"given":"Jonathan","family":"Will","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Onur","family":"Arslan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jonathan","family":"Bader","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dominik","family":"Scheinert","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lauritz","family":"Thamsen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref30","article-title":"Scikit-learn: Machine Learning in Python","volume":"12","author":"pedregosa","year":"2011","journal-title":"Journal of Machine Learning Research"},{"key":"ref10","article-title":"Enel: Context-Aware Dynamic Scaling of Distributed Dataflow Jobs using Graph Propagation","author":"scheinert","year":"2021","journal-title":"IEEE International Performance Computing and Communications Conference"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/BigData50022.2020.9377994"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3298981"},{"key":"ref13","article-title":"Decentralized Federated Learning Preserves Model and Data Privacy","author":"wittkopp","year":"2020","journal-title":"International Conference on Service-Oriented Computing"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/2810103.2813677"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2017.41"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-014-0524-2"},{"key":"ref17","article-title":"Introduction to k Nearest Neighbour Classification and Condensed Nearest Neighbour Data Reduction","author":"sutton","year":"2012","journal-title":"University lectures University of Leicester"},{"key":"ref18","article-title":"Comparative Study Among Data Reduction Techniques over Classification Accuracy","author":"el-hasnony","year":"2015","journal-title":"International Journal of Computer Applications"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/1327452.1327492"},{"key":"ref28","article-title":"Morpheus: Towards Automated SLOs for Enterprise Clusters","author":"jyothi","year":"2016","journal-title":"USENIX Symp on Operating Systems Design & Implementation"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/2371536.2371547"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/IC2E52221.2021.00041"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/2987550.2987566"},{"key":"ref6","article-title":"Cherrypick: Adaptively Unearthing the Best Cloud Con-figurations for Big Data Analytics","author":"alipourfard","year":"2017","journal-title":"14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)"},{"key":"ref29","article-title":"Reoptimizing data parallel computing","author":"agarwal","year":"2012","journal-title":"USENIX Symposium on Networked Systems Design and Implementation (NSDI)"},{"key":"ref5","article-title":"Ernest: Efficient Performance Prediction for Large-Scale Advanced Analytics","author":"venkataraman","year":"2016","journal-title":"USENIX Symposium on Networked Systems Design and Implementation"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IC2E52221.2021.00018"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/PCCC.2016.7820629"},{"key":"ref2","article-title":"Apache Flink: Stream and Batch Processing in a Single Engine","volume":"36","author":"carbone","year":"2015","journal-title":"Bulletin of the IEEE Computer Society Technical Committee on Data Engineering"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/Cluster48925.2021.00052"},{"key":"ref1","article-title":"Spark: Cluster Computing with Working Sets","volume":"10","author":"zaharia","year":"2010","journal-title":"HotCloud"},{"key":"ref20","article-title":"Tarema: Adaptive Resource Allocation for Scalable Scientific Work-flows in Heterogeneous Clusters","author":"bader","year":"2021","journal-title":"IEEE International Conference on Big Data"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2018.00070"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CLOUD.2018.00058"},{"key":"ref24","article-title":"Tuneful: An Online Significance-aware Configuration Tuner for Big Data Analytics","author":"fekry","year":"2020"},{"key":"ref23","article-title":"Scout: An Experienced Guide to Find the Best Cloud Configuration","author":"hsu","year":"2018"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/BigData52589.2021.9671275"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.23919\/CNSM46954.2019.9012752"}],"event":{"name":"2021 IEEE International Conference on Big Data (Big Data)","location":"Orlando, FL, USA","start":{"date-parts":[[2021,12,15]]},"end":{"date-parts":[[2021,12,18]]}},"container-title":["2021 IEEE International Conference on Big Data (Big Data)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9671263\/9671273\/09671742.pdf?arnumber=9671742","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T12:55:36Z","timestamp":1652187336000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9671742\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,15]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/bigdata52589.2021.9671742","relation":{},"subject":[],"published":{"date-parts":[[2021,12,15]]}}}