{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T23:51:29Z","timestamp":1781135489850,"version":"3.54.1"},"publisher-location":"Cham","reference-count":56,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030114039","type":"print"},{"value":"9783030114046","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-11404-6_4","type":"book-chapter","created":{"date-parts":[[2019,1,29]],"date-time":"2019-01-29T08:02:59Z","timestamp":1548748979000},"page":"42-57","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Benchmarking Distributed Data Processing Systems for Machine Learning Workloads"],"prefix":"10.1007","author":[{"given":"Christoph","family":"Boden","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tilmann","family":"Rabl","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sebastian","family":"Schelter","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Volker","family":"Markl","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2019,1,30]]},"reference":[{"key":"4_CR1","unstructured":"https:\/\/mahout.apache.org\/"},{"key":"4_CR2","unstructured":"https:\/\/mlperf.org\/"},{"key":"4_CR3","unstructured":"https:\/\/www.kaggle.com\/surveys\/2017"},{"key":"4_CR4","unstructured":"Abadi, M., et al.: TensorFlow: a system for large-scale machine learning. In: OSDI, pp. 265\u2013283. USENIX Association (2016)"},{"issue":"6","key":"4_CR5","doi-asserted-by":"publisher","first-page":"939","DOI":"10.1007\/s00778-014-0357-y","volume":"23","author":"A Alexandrov","year":"2014","unstructured":"Alexandrov, A., et al.: The stratosphere platform for big data analytics. VLDB J. 23(6), 939\u2013964 (2014)","journal-title":"VLDB J."},{"key":"4_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1007\/978-3-319-15350-6_4","volume-title":"Performance Characterization and Benchmarking. Traditional to Big Data","author":"C Baru","year":"2015","unstructured":"Baru, C., et al.: Discussion of BigBench: a proposed industry standard performance benchmark for big data. In: Nambiar, R., Poess, M. (eds.) TPCTC 2014. LNCS, vol. 8904, pp. 44\u201363. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-15350-6_4"},{"key":"4_CR7","doi-asserted-by":"crossref","unstructured":"Bell, R.M., Koren, Y.: Scalable collaborative filtering with jointly derived neighborhood interpolation weights. In: Seventh IEEE International Conference on Data Mining (ICDM 2007), pp. 43\u201352, October 2007","DOI":"10.1109\/ICDM.2007.90"},{"key":"4_CR8","unstructured":"Boden, C., Rabl, T., Markl, V.: Distributed machine learning-but at what cost?"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Boden, C., Spina, A., Rabl, T., Markl, V.: Benchmarking data flow systems for scalable machine learning. In: Proceedings of the 4th Algorithms and Systems on MapReduce and Beyond, BeyondMR 2017, pp. 5:1\u20135:10. ACM, New York (2017)","DOI":"10.1145\/3070607.3070612"},{"issue":"12","key":"4_CR10","doi-asserted-by":"publisher","first-page":"1694","DOI":"10.14778\/3137765.3137775","volume":"10","author":"J-H B\u00f6se","year":"2017","unstructured":"B\u00f6se, J.-H., et al.: Probabilistic demand forecasting at scale. Proc. VLDB Endow. 10(12), 1694\u20131705 (2017)","journal-title":"Proc. VLDB Endow."},{"key":"4_CR11","unstructured":"Brants, T., Popat, A.C., Xu, P., Och, F.J., Dean, J.: Large language models in machine translation. In: EMNLP, pp. 858\u2013867 (2007)"},{"issue":"1","key":"4_CR12","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1016\/S0169-7552(98)00110-X","volume":"30","author":"S Brin","year":"1998","unstructured":"Brin, S., Page, L.: The anatomy of a large-scale hypertextual Web search engine. Comput. Netw. ISDN Syst. 30(1), 107\u2013117 (1998). Proceedings of the Seventh International World Wide Web Conference","journal-title":"Comput. Netw. ISDN Syst."},{"issue":"1\u20132","key":"4_CR13","doi-asserted-by":"publisher","first-page":"285","DOI":"10.14778\/1920841.1920881","volume":"3","author":"Y Bu","year":"2010","unstructured":"Bu, Y., Howe, B., Balazinska, M., Ernst, M.D.: Haloop: efficient iterative data processing on large clusters. Proc. VLDB Endow. 3(1\u20132), 285\u2013296 (2010)","journal-title":"Proc. VLDB Endow."},{"key":"4_CR14","doi-asserted-by":"crossref","unstructured":"Cai, Z., Gao, Z.J., Luo, S., Perez, L.L., Vagena, Z., Jermaine, C.: A comparison of platforms for implementing and running very large scale machine learning algorithms. In: Proceedings of the 2014 ACM SIGMOD International Conference on Management of Data, SIGMOD 2014, pp. 1371\u20131382 (2014)","DOI":"10.1145\/2588555.2593680"},{"key":"4_CR15","unstructured":"Caninil, K.: Sibyl: a system for large scale supervised machine learning (2012)"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Caruana, R., Karampatziakis, N., Yessenalina, A.: An empirical evaluation of supervised learning in high dimensions. In: Proceedings of the 25th International Conference on Machine Learning, ICML 2008, pp. 96\u2013103. ACM, New York (2008)","DOI":"10.1145\/1390156.1390169"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Caruana, R., Niculescu-Mizil, A.: An empirical comparison of supervised learning algorithms. In: Proceedings of the 23rd International Conference on Machine Learning, ICML 2006, pp. 161\u2013168. ACM, New York (2006)","DOI":"10.1145\/1143844.1143865"},{"issue":"4","key":"4_CR18","doi-asserted-by":"publisher","first-page":"61:1","DOI":"10.1145\/2532128","volume":"5","author":"O Chapelle","year":"2014","unstructured":"Chapelle, O., Manavoglu, E., Rosales, R.: Simple and scalable response prediction for display advertising. ACM Trans. Intell. Syst. Technol. 5(4), 61:1\u201361:34 (2014)","journal-title":"ACM Trans. Intell. Syst. Technol."},{"key":"4_CR19","doi-asserted-by":"crossref","unstructured":"Chen, T., Guestrin, C.: XGBoost: a scalable tree boosting system. In: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD 2016, pp. 785\u2013794. ACM, New York (2016)","DOI":"10.1145\/2939672.2939785"},{"key":"4_CR20","unstructured":"Chen, T., et al.: MXNet: a flexible and efficient machine learning library for heterogeneous distributed systems. CoRR, abs\/1512.01274 (2015)"},{"key":"4_CR21","unstructured":"Coleman, C., et al.: DAWNBench: an end-to-end deep learning benchmark and competition. In: ML Systems Workshop @ NIPS 2017, vol. 100, no. 101, p. 102 (2017)"},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"Das, A.S., Datar, M., Garg, A., Rajaram, S.: Google news personalization: scalable online collaborative filtering. In: Proceedings of the 16th International Conference on World Wide Web, WWW 2007, pp. 271\u2013280. ACM, New York (2007)","DOI":"10.1145\/1242572.1242610"},{"issue":"1","key":"4_CR23","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1145\/1327452.1327492","volume":"51","author":"J Dean","year":"2008","unstructured":"Dean, J., Ghemawat, S.: MapReduce: simplified data processing on large clusters. Commun. ACM 51(1), 107\u2013113 (2008)","journal-title":"Commun. ACM"},{"issue":"10","key":"4_CR24","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1145\/2347736.2347755","volume":"55","author":"P Domingos","year":"2012","unstructured":"Domingos, P.: A few useful things to know about machine learning. Commun. ACM 55(10), 78\u201387 (2012)","journal-title":"Commun. ACM"},{"key":"4_CR25","doi-asserted-by":"crossref","unstructured":"Ekanayake, J., et al.: Twister: a runtime for iterative MapReduce. In: Proceedings of the 19th ACM International Symposium on High Performance Distributed Computing, HPDC 2010, pp. 810\u2013818. ACM, New York (2010)","DOI":"10.1145\/1851476.1851593"},{"key":"4_CR26","doi-asserted-by":"publisher","first-page":"1268","DOI":"10.14778\/2350229.2350245","volume":"5","author":"S Ewen","year":"2012","unstructured":"Ewen, S., Tzoumas, K., Kaufmann, M., Markl, V.: Spinning fast iterative data flows. Proc. VLDB Endow. 5, 1268\u20131279 (2012)","journal-title":"Proc. VLDB Endow."},{"key":"4_CR27","doi-asserted-by":"publisher","first-page":"1189","DOI":"10.1214\/aos\/1013203451","volume":"29","author":"JH Friedman","year":"2000","unstructured":"Friedman, J.H.: Greedy function approximation: a gradient boosting machine. Ann. Stat. 29, 1189\u20131232 (2000)","journal-title":"Ann. Stat."},{"key":"4_CR28","doi-asserted-by":"crossref","unstructured":"Ghazal, A., et al.: Bigbench: towards an industry standard benchmark for big data analytics. In: Proceedings of the 2013 ACM SIGMOD International Conference on Management of Data, SIGMOD 2013, pp. 1197\u20131208. ACM, New York (2013)","DOI":"10.1145\/2463676.2463712"},{"key":"4_CR29","volume-title":"Deep Learning","author":"I Goodfellow","year":"2016","unstructured":"Goodfellow, I., Bengio, Y., Courville, A.: Deep Learning. The MIT Press, Cambridge (2016)"},{"issue":"5","key":"4_CR30","doi-asserted-by":"publisher","first-page":"532","DOI":"10.1145\/42411.42415","volume":"31","author":"JL Gustafson","year":"1988","unstructured":"Gustafson, J.L.: Reevaluating Amdahl\u2019s law. Commun. ACM 31(5), 532\u2013533 (1988)","journal-title":"Commun. ACM"},{"issue":"2","key":"4_CR31","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1109\/MIS.2009.36","volume":"24","author":"A Halevy","year":"2009","unstructured":"Halevy, A., Norvig, P., Pereira, F.: The unreasonable effectiveness of data. IEEE Intell. Syst. 24(2), 8\u201312 (2009)","journal-title":"IEEE Intell. Syst."},{"key":"4_CR32","doi-asserted-by":"crossref","unstructured":"He, X., et al.: Practical lessons from predicting clicks on ads at Facebook. In: Proceedings of the Eighth International Workshop on Data Mining for Online Advertising, ADKDD 2014, pp. 5:1\u20135:9. ACM, New York (2014)","DOI":"10.1145\/2648584.2648589"},{"key":"4_CR33","unstructured":"Hoffer, E., Hubara, I., Soudry, D.: Train longer, generalize better: closing the generalization gap in large batch training of neural networks. In: NIPS, pp. 1729\u20131739 (2017)"},{"key":"4_CR34","series-title":"Lecture Notes in Business Information Processing","doi-asserted-by":"publisher","first-page":"209","DOI":"10.1007\/978-3-642-19294-4_9","volume-title":"New Frontiers in Information and Software as Services","author":"S Huang","year":"2011","unstructured":"Huang, S., Huang, J., Dai, J., Xie, T., Huang, B.: The HiBench benchmark suite: characterization of the MapReduce-based data analysis. In: Agrawal, D., Candan, K.S., Li, W.-S. (eds.) New Frontiers in Information and Software as Services. LNBIP, vol. 74, pp. 209\u2013228. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-19294-4_9"},{"issue":"7","key":"4_CR35","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1145\/2611567","volume":"57","author":"HV Jagadish","year":"2014","unstructured":"Jagadish, H.V., et al.: Big data and its technical challenges. Commun. ACM 57(7), 86\u201394 (2014)","journal-title":"Commun. ACM"},{"key":"4_CR36","doi-asserted-by":"crossref","unstructured":"Jimmy, L., Kolcz, A.: Large-scale machine learning at Twitter. In: SIGMOD 2012 (2012)","DOI":"10.1007\/978-3-642-30284-8_4"},{"issue":"8","key":"4_CR37","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/MC.2009.263","volume":"42","author":"Y Koren","year":"2009","unstructured":"Koren, Y., Bell, R., Volinsky, C.: Matrix factorization techniques for recommender systems. Computer 42(8), 30\u201337 (2009)","journal-title":"Computer"},{"issue":"7553","key":"4_CR38","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436 (2015)","journal-title":"Nature"},{"key":"4_CR39","doi-asserted-by":"publisher","DOI":"10.2200\/S00274ED1V01Y201006HLT007","volume-title":"Data-Intensive Text Processing with MapReduce","author":"J Lin","year":"2010","unstructured":"Lin, J., Dyer, C.: Data-Intensive Text Processing with MapReduce. Morgan and Claypool Publishers, San Rafael (2010)"},{"key":"4_CR40","doi-asserted-by":"crossref","unstructured":"Ling, X., Deng, W., Gu, C., Zhou, H., Li, C., Sun, F.: Model ensemble for click prediction in Bing search ads. In: Proceedings of the 26th International Conference on World Wide Web Companion, WWW 2017 Companion, pp. 689\u2013698, Republic and Canton of Geneva, Switzerland. International World Wide Web Conferences Steering Committee (2017)","DOI":"10.1145\/3041021.3054192"},{"issue":"8","key":"4_CR41","doi-asserted-by":"publisher","first-page":"716","DOI":"10.14778\/2212351.2212354","volume":"5","author":"Y Low","year":"2012","unstructured":"Low, Y., Bickson, D., Gonzalez, J., Guestrin, C., Kyrola, A., Hellerstein, J.M.: Distributed GraphLab: a framework for machine learning and data mining in the cloud. Proce. VLDB Endow. 5(8), 716\u2013727 (2012)","journal-title":"Proce. VLDB Endow."},{"key":"4_CR42","unstructured":"Low, Y., Gonzalez, J.E., Kyrola, A., Bickson, D., Guestrin, C.E., Hellerstein, J.: GraphLab: a new framework for parallel machine learning. arXiv preprint arXiv:1408.2041 (2014)"},{"key":"4_CR43","first-page":"433","volume":"2016","author":"OC Marcu","year":"2016","unstructured":"Marcu, O.C., Costan, A., Antoniu, G., P\u00e9rez-Hern\u00e9ndez, M.S.: Spark versus flink: understanding performance in big data analytics frameworks. IEEE CLUSTER 2016, 433\u2013442 (2016)","journal-title":"IEEE CLUSTER"},{"key":"4_CR44","doi-asserted-by":"crossref","unstructured":"McMahan, H.B., et al.: Ad click prediction: a view from the trenches. In: KDD 2013. ACM (2013)","DOI":"10.1145\/2487575.2488200"},{"key":"4_CR45","unstructured":"McSherry, F., Isard, M., Murray, D.G.: Scalability! But at what cost? In: USENIX HOTOS 2015. USENIX Association (2015)"},{"issue":"1","key":"4_CR46","first-page":"1235","volume":"17","author":"X Meng","year":"2016","unstructured":"Meng, X., et al.: MLlib: machine learning in Apache spark. J. Mach. Learn. Res. 17(1), 1235\u20131241 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"4_CR47","unstructured":"Ousterhout, K., Rasti, R., Ratnasamy, S., Shenker, S., Chun, B.-G.: Making sense of performance in data analytics frameworks. In: Proceedings of the 12th USENIX Conference on Networked Systems Design and Implementation, NSDI 2015, pp. 293\u2013307. USENIX Association, Berkeley (2015)"},{"key":"4_CR48","doi-asserted-by":"crossref","unstructured":"Richardson, M., Dominowska, E., Ragno, R.: Predicting clicks: estimating the click-through rate for new ads. In: WWW 2007. ACM (2007)","DOI":"10.1145\/1242572.1242643"},{"key":"4_CR49","doi-asserted-by":"crossref","unstructured":"Schelter, S., Boden, C., Schenck, M., Alexandrov, A., Markl, V.: Distributed matrix factorization with MapReduce using a series of broadcast-joins. In: ACM RecSys 2013 (2013)","DOI":"10.1145\/2507157.2507195"},{"issue":"13","key":"4_CR50","doi-asserted-by":"publisher","first-page":"2110","DOI":"10.14778\/2831360.2831365","volume":"8","author":"J Shi","year":"2015","unstructured":"Shi, J., et al.: Clash of the Titans: MapReduce vs. spark for large scale data analytics. Proc. VLDB Endow. 8(13), 2110\u20132121 (2015)","journal-title":"Proc. VLDB Endow."},{"key":"4_CR51","first-page":"424","volume":"2016","author":"J Veiga","year":"2016","unstructured":"Veiga, J., Exp\u00f3sito, R.R., Pardo, X.C., Taboada, G.L., Tourifio, J.: Performance evaluation of big data frameworks for large-scale data analytics. IEEE BigData 2016, 424\u2013431 (2016)","journal-title":"IEEE BigData"},{"key":"4_CR52","doi-asserted-by":"crossref","unstructured":"Weinberger, K., Dasgupta, A., Langford, J., Smola, A., Attenberg, J.: Feature hashing for large scale multitask learning. In: Proceedings of the 26th Annual International Conference on Machine Learning, ICML 2009, pp. 1113\u20131120. ACM, New York (2009)","DOI":"10.1145\/1553374.1553516"},{"key":"4_CR53","unstructured":"Yu, D., et al.: An introduction to computational networks and the computational network toolkit. Microsoft Technical report MSR-TR-2014-112 (2014)"},{"key":"4_CR54","unstructured":"Zaharia, M., et al.: Resilient distributed datasets: a fault-tolerant abstraction for in-memory cluster computing. In: NSDI 2012 (2012)"},{"key":"4_CR55","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1007\/978-3-540-68880-8_32","volume-title":"Algorithmic Aspects in Information and Management","author":"Y Zhou","year":"2008","unstructured":"Zhou, Y., Wilkinson, D., Schreiber, R., Pan, R.: Large-scale parallel collaborative filtering for the Netflix prize. In: Fleischer, R., Xu, J. (eds.) AAIM 2008. LNCS, vol. 5034, pp. 337\u2013348. Springer, Heidelberg (2008). https:\/\/doi.org\/10.1007\/978-3-540-68880-8_32"},{"key":"4_CR56","doi-asserted-by":"crossref","unstructured":"Zhuang, Y., Chin, W.-S., Juan, Y.-C., Lin, C.-J.: A fast parallel SGD for matrix factorization in shared memory systems. In: Proceedings of the 7th ACM Conference on Recommender Systems, RecSys 2013, pp. 249\u2013256. ACM, New York (2013)","DOI":"10.1145\/2507157.2507164"}],"container-title":["Lecture Notes in Computer Science","Performance Evaluation and Benchmarking for the Era of Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-11404-6_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,13]],"date-time":"2023-09-13T07:36:58Z","timestamp":1694590618000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-11404-6_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030114039","9783030114046"],"references-count":56,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-11404-6_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"30 January 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TPCTC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Technology Conference on Performance Evaluation and Benchmarking","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Rio de Janeiro","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brazil","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 August 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 August 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tpctc2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.tpc.org\/tpctc\/tpctc2018\/default.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}