{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,29]],"date-time":"2025-08-29T17:10:11Z","timestamp":1756487411282,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":80,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,27]],"date-time":"2023-11-27T00:00:00Z","timestamp":1701043200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,27]]},"DOI":"10.1145\/3590140.3629112","type":"proceedings-article","created":{"date-parts":[[2023,11,24]],"date-time":"2023-11-24T18:06:33Z","timestamp":1700849193000},"page":"151-164","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Characterizing Distributed Machine Learning Workloads on Apache Spark"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-4391-8021","authenticated-orcid":false,"given":"Yasmine","family":"Djebrouni","sequence":"first","affiliation":[{"name":"University of Grenoble Alps, France"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7636-9320","authenticated-orcid":false,"given":"Isabelly","family":"Rocha","sequence":"additional","affiliation":[{"name":"University of Neuch\u00e2tel, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0558-0123","authenticated-orcid":false,"given":"Sara","family":"Bouchenak","sequence":"additional","affiliation":[{"name":"INSA Lyon, France"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4228-6735","authenticated-orcid":false,"given":"Lydia","family":"Chen","sequence":"additional","affiliation":[{"name":"University of Neuch\u00e2tel, Switzerland, TU Delft, Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1574-6721","authenticated-orcid":false,"given":"Pascal","family":"Felber","sequence":"additional","affiliation":[{"name":"University of Neuch\u00e2tel, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7042-0161","authenticated-orcid":false,"given":"Vania","family":"Marangozova","sequence":"additional","affiliation":[{"name":"University of Grenoble Alps, France"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1493-6603","authenticated-orcid":false,"given":"Valerio","family":"Schiavoni","sequence":"additional","affiliation":[{"name":"University of Neuch\u00e2tel, Switzerland"}]}],"member":"320","published-online":{"date-parts":[[2023,11,27]]},"reference":[{"volume-title":"https:\/\/mlperf.org\/. Last accessed","year":"2023","key":"e_1_3_2_1_1_1","unstructured":"MLPerf. https:\/\/mlperf.org\/. Last accessed: Oct 24, 2023."},{"key":"e_1_3_2_1_2_1","volume-title":"http:\/\/qwone.com\/~jason\/20Newsgroups. Last accessed","author":"News","year":"2023","unstructured":"News 20 dataset. http:\/\/qwone.com\/~jason\/20Newsgroups. Last accessed: Oct 24, 2023."},{"key":"e_1_3_2_1_3_1","unstructured":"Sparkmeasure a tool for performance troubleshooting of apache spark workloads. https:\/\/db-blog.web.cern.ch\/blog\/luca-canali\/2018-08-sparkmeasure-tool-performance-troubleshooting-apache-spark-workloads. Last accessed: Oct 24 2023."},{"key":"e_1_3_2_1_4_1","volume-title":"http:\/\/archive.ics.uci.edu\/ml. Last accessed","author":"Machine Learning Repository UCI","year":"2023","unstructured":"UCI Machine Learning Repository. http:\/\/archive.ics.uci.edu\/ml. Last accessed: Oct 24, 2023."},{"volume-title":"https:\/\/www.kaggle.com\/datasets","year":"2021","key":"e_1_3_2_1_5_1","unstructured":"Kaggle. https:\/\/www.kaggle.com\/datasets, 2021."},{"key":"e_1_3_2_1_6_1","volume-title":"May","author":"Workload Characterization Git Repository DML","year":"2023","unstructured":"DML Workload Characterization Git Repository. https:\/\/github.com\/DMLCharacterization\/DMLCharacterization\/, May 2023."},{"key":"e_1_3_2_1_7_1","first-page":"265","volume-title":"Xiaoqiang Zheng. TensorFlow: A System for Large-scale Machine Learning. In Proceedings of the 12th USENIX Conference on Operating Systems Design and Implementation, Osdi'16","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, Manjunath Kudlur, Josh Levenberg, Rajat Monga, Sherry Moore, Derek G. Murray, Benoit Steiner, Paul Tucker, Vijay Vasudevan, Pete Warden, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng. TensorFlow: A System for Large-scale Machine Learning. In Proceedings of the 12th USENIX Conference on Operating Systems Design and Implementation, Osdi'16, pages 265--283, Berkeley, CA, USA, 2016. USENIX Association."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2013.04.015"},{"key":"e_1_3_2_1_9_1","first-page":"736","volume-title":"Alterkawi and Matteo Migliavacca. Parallelism and Partitioning in Large-Scale GAs Using Spark. In Proceedings of the Genetic and Evolutionary Computation Conference, GECCO '19","author":"Laila","year":"2019","unstructured":"Laila Alterkawi and Matteo Migliavacca. Parallelism and Partitioning in Large-Scale GAs Using Spark. In Proceedings of the Genetic and Evolutionary Computation Conference, GECCO '19, pages 736--744, New York, NY, USA, 2019. Association for Computing Machinery."},{"key":"e_1_3_2_1_10_1","volume-title":"https:\/\/spark.apache.org\/docs\/2.4.3\/configuration.html. Last accessed","author":"Spark Apache","year":"2023","unstructured":"Apache Spark. Spark Configuration. https:\/\/spark.apache.org\/docs\/2.4.3\/configuration.html. Last accessed: Oct 24, 2023."},{"key":"e_1_3_2_1_11_1","volume-title":"https:\/\/aws.amazon.com\/fr\/ec2\/pricing\/on-demand\/. Last accessed","author":"On-Demand Pricing AWS.","year":"2023","unstructured":"AWS. Amazon EC2 On-Demand Pricing. https:\/\/aws.amazon.com\/fr\/ec2\/pricing\/on-demand\/. Last accessed: Oct 24, 2023."},{"key":"e_1_3_2_1_12_1","volume-title":"July","author":"Baldi Pierre","year":"2014","unstructured":"Pierre Baldi, Peter Sadowski, and Daniel Whiteson. Searching for Exotic Particles in High-Energy Physics with Deep Learning. Nature Communications, 5(C), July 2014."},{"key":"e_1_3_2_1_13_1","volume-title":"Azure Machine Learning. Microsoft Azure Essentials","author":"Barnes Jeff","year":"2015","unstructured":"Jeff Barnes. Azure Machine Learning. Microsoft Azure Essentials. 1st ed, Microsoft, 2015.","edition":"1"},{"key":"e_1_3_2_1_14_1","volume-title":"Workload Characterization: A Survey Revisited. ACM Computing Surveys (CSUR), 48(3):1--43","author":"Calzarossa Maria Carla","year":"2016","unstructured":"Maria Carla Calzarossa, Luisa Massari, and Daniele Tessera. Workload Characterization: A Survey Revisited. ACM Computing Surveys (CSUR), 48(3):1--43, 2016."},{"key":"e_1_3_2_1_15_1","volume-title":"Proc. of Machine Learning and Systems 2020","author":"Chen Beidi","year":"2020","unstructured":"Beidi Chen, Tharun Medini, James Farwell, Sameh Gobriel, Tsung-Yuan Charlie Tai, and Anshumali Shrivastava. SLIDE: In Defense of Smart Algorithms over Hardware Acceleration for Large-Scale Deep Learning Systems. In Inderjit S. Dhillon, Dimitris S. Papailiopoulos, and Vivienne Sze, editors, Proc. of Machine Learning and Systems 2020, MLSys 2020, Austin, TX, USA, March 2-4, 2020. mlsys.org, 2020."},{"key":"e_1_3_2_1_16_1","volume-title":"Astro: Auto-Generation of Synthetic Traces Using Scaling Pattern Recognition for MPI Workloads","author":"Chen Jian","year":"2017","unstructured":"Jian Chen and Russell M. Clapp. Astro: Auto-Generation of Synthetic Traces Using Scaling Pattern Recognition for MPI Workloads. IEEE Transactions on Parallel and Distributed Systems, 28(8):2159--2171, 2017."},{"key":"e_1_3_2_1_17_1","first-page":"874","volume-title":"Proceedings of the 34th International Conference on Machine Learning, volume 70 of Proceedings of Machine Learning Research","author":"Cortes Corinna","year":"2017","unstructured":"Corinna Cortes, Xavier Gonzalvo, Vitaly Kuznetsov, Mehryar Mohri, and Scott Yang. AdaNet: Adaptive Structural Learning of Artificial Neural Networks. In Doina Precup and Yee Whye Teh, editors, Proceedings of the 34th International Conference on Machine Learning, volume 70 of Proceedings of Machine Learning Research, pages 874--883, 06--11 Aug 2017."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357223.3362707"},{"key":"e_1_3_2_1_19_1","first-page":"53","volume-title":"Predicting Positive","author":"Danziger Samuel","year":"2009","unstructured":"Samuel Danziger, Roberta Baronio, Lydia Ho, Linda Hall, Kirsty Salmon, G. Hatfield, Peter Kaiser, and Richard Lathrop. Predicting Positive p53 Cancer Rescue Regions Using Most Informative Positive (MIP) Active Learning. PLoS computational biology, 5, 09 2009."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2211477"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1561\/9781601988157"},{"key":"e_1_3_2_1_22_1","volume-title":"August","author":"Diaz-Chito Katerine","year":"2016","unstructured":"Katerine Diaz-Chito, Aura Hern\u00e1ndez-Sabat\u00e9, and Antonio M. L\u00f3pez. A Reduced Feature Set for Driver Head Pose Estimation. Appl. Soft Comput., 45(C):98--107, August 2016."},{"key":"e_1_3_2_1_23_1","volume-title":"Cluster Computing","author":"Elshawi Radwa","year":"2021","unstructured":"Radwa Elshawi, Abdul Wahab, Ahmed Barnawi, and Sherif Sakr. DLBench: A Comprehensive Experimental Evaluation of Deep Learning Frameworks. Cluster Computing, February 2021."},{"key":"e_1_3_2_1_24_1","volume-title":"The Next Generation. CoRR, abs\/2007.04074","author":"Feurer Matthias","year":"2020","unstructured":"Matthias Feurer, Katharina Eggensperger, Stefan Falkner, Marius Lindauer, and Frank Hutter. Auto-Sklearn 2.0: The Next Generation. CoRR, abs\/2007.04074, 2020."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2019.04.014"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1504\/IJCSE.2015.071879"},{"key":"e_1_3_2_1_27_1","first-page":"2094","volume-title":"Galdino. WGCap: A Synthetic Trace Generation Tool for Capacity Planning of Virtual Server Environments. In 2010 IEEE International Conference on Systems, Man and Cybernetics","author":"Galindo Hugo E. S.","year":"2010","unstructured":"Hugo E. S. Galindo, Erico A. C. Guedes, Paulo R. M. Maciel, Bruno Silva, and Sergio M. L. Galdino. WGCap: A Synthetic Trace Generation Tool for Capacity Planning of Virtual Server Environments. In 2010 IEEE International Conference on Systems, Man and Cybernetics, pages 2094--2101, 2010."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/S1352-2310(97)00447-0"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-9473(02)00257-8"},{"key":"e_1_3_2_1_30_1","volume-title":"April","author":"Herodotou Herodotos","year":"2020","unstructured":"Herodotos Herodotou, Yuxing Chen, and Jiaheng Lu. A Survey on Automatic Parameter Tuning for Big Data Processing Systems. ACM Comput. Surv., 53(2), April 2020."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10115-021-01605-0"},{"key":"e_1_3_2_1_33_1","volume-title":"Machine Learning with AWS: Explore The Power of Cloud Services for Your Machine Learning and Artificial Intelligence Projects","author":"Jackovich Jeffrey","year":"2018","unstructured":"Jeffrey Jackovich and Ruze Richards. Machine Learning with AWS: Explore The Power of Cloud Services for Your Machine Learning and Artificial Intelligence Projects. Packt Publishing Ltd, 2018."},{"key":"e_1_3_2_1_34_1","first-page":"947","volume-title":"Fan Yang. Analysis of Large-Scale Multi-Tenant GPU Clusters for DNN Training Workloads. In 2019 USENIX Annual Technical Conference (USENIX ATC 19)","author":"Jeon Myeongjae","year":"2019","unstructured":"Myeongjae Jeon, Shivaram Venkataraman, Amar Phanishayee, Junjie Qian, Wencong Xiao, and Fan Yang. Analysis of Large-Scale Multi-Tenant GPU Clusters for DNN Training Workloads. In 2019 USENIX Annual Technical Conference (USENIX ATC 19), pages 947--960, Renton, WA, July 2019. USENIX Association."},{"key":"e_1_3_2_1_35_1","first-page":"675","volume-title":"Trevor Darrell. Caffe: Convolutional Architecture for Fast Feature Embedding. In Proceedings of the 22nd ACM international conference on Multimedia","author":"Jia Yangqing","year":"2014","unstructured":"Yangqing Jia, Evan Shelhamer, Jeff Donahue, Sergey Karayev, Jonathan Long, Ross Girshick, Sergio Guadarrama, and Trevor Darrell. Caffe: Convolutional Architecture for Fast Feature Embedding. In Proceedings of the 22nd ACM international conference on Multimedia, pages 675--678, 2014."},{"key":"e_1_3_2_1_36_1","first-page":"1946","volume-title":"Xia Hu. Auto-Keras: An Efficient Neural Architecture Search System. In Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining","author":"Jin Haifeng","year":"2019","unstructured":"Haifeng Jin, Qingquan Song, and Xia Hu. Auto-Keras: An Efficient Neural Architecture Search System. In Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, pages 1946--1956, 2019."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1980.1102314"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCC50000.2020.9219577"},{"key":"e_1_3_2_1_39_1","volume-title":"Machine Learning Applications in Cancer Prognosis and Prediction. Computational and structural biotechnology journal, 13:8--17","author":"Kourou Konstantina","year":"2015","unstructured":"Konstantina Kourou, Themis P. Exarchos, Konstantinos P. Exarchos, Michalis V. Karamouzis, and Dimitrios I Fotiadis. Machine Learning Applications in Cancer Prognosis and Prediction. Computational and structural biotechnology journal, 13:8--17, 2015."},{"key":"e_1_3_2_1_40_1","volume-title":"Valentina Salapura. SparkBench: A Comprehensive Benchmarking Suite for in Memory Data Analytic Platform Spark. In Proceedings of the 12th ACM International Conference on Computing Frontiers, CF '15","author":"Li Min","year":"2015","unstructured":"Min Li, Jian Tan, Yandong Wang, Li Zhang, and Valentina Salapura. SparkBench: A Comprehensive Benchmarking Suite for in Memory Data Analytic Platform Spark. In Proceedings of the 12th ACM International Conference on Computing Frontiers, CF '15, New York, NY, USA, 2015. Association for Computing Machinery."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2023.3246032"},{"key":"e_1_3_2_1_42_1","volume-title":"Random Search, Genetic Algorithm: A Big Comparison for NAS. arXiv preprint arXiv:1912.06059","author":"Liashchynskyi Petro","year":"2019","unstructured":"Petro Liashchynskyi and Pavlo Liashchynskyi. Grid Search, Random Search, Genetic Algorithm: A Big Comparison for NAS. arXiv preprint arXiv:1912.06059, 2019."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0031-3203(02)00060-2"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.07.006"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2015.08.016"},{"key":"e_1_3_2_1_46_1","volume-title":"Evaluating Accuracy (or Error) Measures. Fontainebleau edition","author":"Makridakis S. G.","year":"1995","unstructured":"S. G. Makridakis and M. Hibon. Evaluating Accuracy (or Error) Measures. Fontainebleau edition, 1995."},{"key":"e_1_3_2_1_47_1","first-page":"433","volume-title":"Spark Versus Flink: Understanding Performance in Big Data Analytics Frameworks. In 2016 IEEE International Conference on Cluster Computing(CLUSTER)","author":"Marcu O.","year":"2016","unstructured":"O. Marcu, A. Costan, G. Antoniu, and M. S. Perez-Hernandez. Spark Versus Flink: Understanding Performance in Big Data Analytics Frameworks. In 2016 IEEE International Conference on Cluster Computing(CLUSTER), pages 433--442, 2016."},{"key":"e_1_3_2_1_48_1","first-page":"28","volume-title":"Kristie Seymorey. Building Domain-Specific Search Engines with Machine Learning Techniques. In Proceedings of the AAAI Spring Symposium on Intelligent Agents in Cyberspace. Citeseer","author":"McCallumzy Andrew","year":"1999","unstructured":"Andrew McCallumzy, Kamal Nigamy, Jason Renniey, and Kristie Seymorey. Building Domain-Specific Search Engines with Machine Learning Techniques. In Proceedings of the AAAI Spring Symposium on Intelligent Agents in Cyberspace. Citeseer, pages 28--39. Citeseer, 1999."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.5555\/2946645.2946679"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3071762"},{"key":"e_1_3_2_1_51_1","volume-title":"A Survey of Methods for Analyzing and Improving GPU Energy Efficiency. ACM Comput. Surv., 47(2), aug","author":"Mittal Sparsh","year":"2014","unstructured":"Sparsh Mittal and Jeffrey S. Vetter. A Survey of Methods for Analyzing and Improving GPU Energy Efficiency. ACM Comput. Surv., 47(2), aug 2014."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-020-03328-5"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.2307\/2344614"},{"key":"e_1_3_2_1_54_1","first-page":"417","volume-title":"Kewen Wang. Towards Automatic Tuning of Apache Spark Configuration. In 2018 IEEE 11th International Conference on Cloud Computing (CLOUD)","author":"Nguyen Nhan","year":"2018","unstructured":"Nhan Nguyen, Mohammad Maifi Hasan Khan, and Kewen Wang. Towards Automatic Tuning of Apache Spark Configuration. In 2018 IEEE 11th International Conference on Cloud Computing (CLOUD), pages 417--425, 2018."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00022"},{"key":"e_1_3_2_1_56_1","first-page":"5","volume-title":"Hyuk-Yoon Kwon. Trade-Off Analysis Between Parallelism and Accuracy of SLIC on Apache Spark. In 2021 IEEE International Conference on Big Data and Smart Computing (BigComp)","author":"Park Gang-Min","year":"2021","unstructured":"Gang-Min Park, Yong Seok Heo, and Hyuk-Yoon Kwon. Trade-Off Analysis Between Parallelism and Accuracy of SLIC on Apache Spark. In 2021 IEEE International Conference on Big Data and Smart Computing (BigComp), pages 5--12, 2021."},{"key":"e_1_3_2_1_57_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch\u00e9-Buc, E. Fox, and R. Garnett, editors, Advances in Neural Information Processing Systems, volume 32. Curran Associates, Inc., 2019."},{"key":"e_1_3_2_1_58_1","volume-title":"https:\/\/software.intel.com\/content\/www\/us\/en\/develop\/articles\/intel-performance-counter-monitor.html","author":"Processor Counter PCM.","year":"2022","unstructured":"PCM. Processor Counter Monitor (PCM). https:\/\/software.intel.com\/content\/www\/us\/en\/develop\/articles\/intel-performance-counter-monitor.html, 2022. Last accessed: Oct 24, 2023."},{"key":"e_1_3_2_1_59_1","first-page":"75","volume-title":"Sandro Rigo. Empirical Web Server Power Modeling and Characterization. In 2011 IEEE International Symposium on Workload Characterization (IISWC)","author":"Piga Leonardo","year":"2011","unstructured":"Leonardo Piga, Reinaldo Bergamaschi, Felipe Klein, Rodolfo Azevedo, and Sandro Rigo. Empirical Web Server Power Modeling and Characterization. In 2011 IEEE International Symposium on Workload Characterization (IISWC), pages 75--75, 2011."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1002\/widm.1301"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022643204877"},{"key":"e_1_3_2_1_62_1","volume-title":"Fine-Tuning Deep Convolutional Networks for Plant Recognition. CLEF (Working Notes), 1391:467--475","author":"Reyes Angie K","year":"2015","unstructured":"Angie K Reyes, Juan C Caicedo, and Jorge E Camargo. Fine-Tuning Deep Convolutional Networks for Plant Recognition. CLEF (Working Notes), 1391:467--475, 2015."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1002\/asi.4630300621"},{"key":"e_1_3_2_1_64_1","first-page":"41","volume-title":"An Empirical Study of The Naive Bayes Classifier. In IJCAI 2001 workshop on empirical methods in artificial intelligence","volume":"3","author":"Irina","year":"2001","unstructured":"Irina Rish et al. An Empirical Study of The Naive Bayes Classifier. In IJCAI 2001 workshop on empirical methods in artificial intelligence, volume 3, pages 41--46, 2001."},{"key":"e_1_3_2_1_65_1","first-page":"89","volume-title":"Valerio Schiavoni. PipeTune: Pipeline Parallelism of Hyper and System Parameters Tuning for Deep Learning Clusters. In Proceedings of the 21st International Middleware Conference, Middleware '20","author":"Rocha Isabelly","year":"2020","unstructured":"Isabelly Rocha, Nathaniel Morris, Lydia Y. Chen, Pascal Felber, Robert Birke, and Valerio Schiavoni. PipeTune: Pipeline Parallelism of Hyper and System Parameters Tuning for Deep Learning Clusters. In Proceedings of the 21st International Middleware Conference, Middleware '20, pages 89--104, New York, NY, USA, 2020. Association for Computing Machinery."},{"key":"e_1_3_2_1_66_1","first-page":"8614","volume-title":"Bhuvana Ramabhadran. Deep Convolutional Neural Networks for LVCSR. In 2013 IEEE international conference on acoustics, speech and signal processing","author":"Sainath Tara N","year":"2013","unstructured":"Tara N Sainath, Abdel-rahman Mohamed, Brian Kingsbury, and Bhuvana Ramabhadran. Deep Convolutional Neural Networks for LVCSR. In 2013 IEEE international conference on acoustics, speech and signal processing, pages 8614--8618. IEEE, 2013."},{"key":"e_1_3_2_1_67_1","first-page":"1","volume-title":"Robert Chansler. The Hadoop Distributed File System. In 2010 IEEE 26th symposium on mass storage systems and technologies (MSST)","author":"Shvachko Konstantin","year":"2010","unstructured":"Konstantin Shvachko, Hairong Kuang, Sanjay Radia, and Robert Chansler. The Hadoop Distributed File System. In 2010 IEEE 26th symposium on mass storage systems and technologies (MSST), pages 1--10. Ieee, 2010."},{"key":"e_1_3_2_1_68_1","volume-title":"Hadoop Commands Guide. https:\/\/hadoop.apache.org\/docs\/r1.2.1\/cluster_setup.html#Configuration. Last accessed","author":"Software Foundation The Apache","year":"2023","unstructured":"The Apache Software Foundation. Hadoop Commands Guide. https:\/\/hadoop.apache.org\/docs\/r1.2.1\/cluster_setup.html#Configuration. Last accessed: Oct 24, 2023."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.snb.2012.01.074"},{"key":"e_1_3_2_1_70_1","first-page":"189","volume-title":"Yangqing Jia. Characterizing Deep Learning Training Workloads on Alibaba-PAI. In IEEE International Symposium on Workload Characterization, IISWC 2019","author":"Wang Mengdi","year":"2019","unstructured":"Mengdi Wang, Chen Meng, Guoping Long, Chuan Wu, Jun Yang, Wei Lin, and Yangqing Jia. Characterizing Deep Learning Training Workloads on Alibaba-PAI. In IEEE International Symposium on Workload Characterization, IISWC 2019, Orlando, FL, USA, November 3-5, 2019, pages 189--202. IEEE, 2019."},{"key":"e_1_3_2_1_71_1","first-page":"945","volume-title":"Yu Ding. MLaaS in the Wild: Workload Analysis and Scheduling in Large-Scale Heterogeneous GPU Clusters. In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22)","author":"Weng Qizhen","year":"2022","unstructured":"Qizhen Weng, Wencong Xiao, Yinghao Yu, Wei Wang, Cheng Wang, Jian He, Yong Li, Liping Zhang, Wei Lin, and Yu Ding. MLaaS in the Wild: Workload Analysis and Scheduling in Large-Scale Heterogeneous GPU Clusters. In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI 22), pages 945--960, Renton, WA, 2022. USENIX Association."},{"key":"e_1_3_2_1_72_1","volume-title":"Principal Component Analysis. Chemometrics and intelligent laboratory systems, 2(1-3):37--52","author":"Wold Svante","year":"1987","unstructured":"Svante Wold, Kim Esbensen, and Paul Geladi. Principal Component Analysis. Chemometrics and intelligent laboratory systems, 2(1-3):37--52, 1987."},{"key":"e_1_3_2_1_73_1","volume-title":"Fashion-MNIST: A Novel Image Dataset for Benchmarking Machine Learning Algorithms. arXiv preprint arXiv:1708.07747","author":"Xiao Han","year":"2017","unstructured":"Han Xiao, Kashif Rasul, and Roland Vollgraf. Fashion-MNIST: A Novel Image Dataset for Benchmarking Machine Learning Algorithms. arXiv preprint arXiv:1708.07747, 2017."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1145\/2463676.2465288"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD56317.2022.00103"},{"key":"e_1_3_2_1_76_1","volume-title":"International Journal of computer science and information technologies, 1(2):121--125","author":"Yedla Madhu","year":"2010","unstructured":"Madhu Yedla, Srinivasa Rao Pathakota, and TM Srinivasa. Enhancing K-Means Clustering Algorithm with Improved Initial Center. International Journal of computer science and information technologies, 1(2):121--125, 2010."},{"key":"e_1_3_2_1_77_1","first-page":"15","volume-title":"Presented as part of the 9th USENIX Symposium on Networked Systems Design and Implementation (NSDI 12)","author":"Zaharia Matei","year":"2012","unstructured":"Matei Zaharia, Mosharaf Chowdhury, Tathagata Das, Ankur Dave, Justin Ma, Murphy McCauly, Michael J Franklin, Scott Shenker, and Ion Stoica. Resilient Distributed Datasets: A Fault-Tolerant Abstraction for In-Memory Cluster Computing. In Presented as part of the 9th USENIX Symposium on Networked Systems Design and Implementation (NSDI 12), pages 15--28, 2012."},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/2934664"},{"key":"e_1_3_2_1_79_1","volume-title":"Gennady Pekhimenko. Benchmarking and Analyzing Deep Neural Network Training. In IEEE International Symposium on Workload Characterization (IISWC'18)","author":"Zhu Hongyu","year":"2018","unstructured":"Hongyu Zhu, Mohamed Akrout, Bojian Zheng, AndrewPelegris, Anand Jayarajan, Amar Phanishayee, Bianca Schroeder, and Gennady Pekhimenko. Benchmarking and Analyzing Deep Neural Network Training. In IEEE International Symposium on Workload Characterization (IISWC'18), North Carolina, October 2018."},{"key":"e_1_3_2_1_80_1","first-page":"135","volume-title":"Kaiyuan Shen. Logistic Regression Model Optimization and Case Analysis. In 2019 IEEE 7th International Conference on Computer Science and Network Technology (ICCSNT)","author":"Zou Xiaonan","year":"2019","unstructured":"Xiaonan Zou, Yong Hu, Zhewen Tian, and Kaiyuan Shen. Logistic Regression Model Optimization and Case Analysis. In 2019 IEEE 7th International Conference on Computer Science and Network Technology (ICCSNT), pages 135--139. IEEE, 2019."}],"event":{"name":"Middleware '23: 24th International Middleware Conference","sponsor":["ACM Association for Computing Machinery","IFIP International Federation for Information Processing"],"location":"Bologna Italy","acronym":"Middleware '23"},"container-title":["Proceedings of the 24th International Middleware Conference on ZZZ"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3590140.3629112","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3590140.3629112","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,29]],"date-time":"2025-08-29T16:52:01Z","timestamp":1756486321000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3590140.3629112"}},"subtitle":["(Experimentation and Deployment Paper)"],"short-title":[],"issued":{"date-parts":[[2023,11,27]]},"references-count":80,"alternative-id":["10.1145\/3590140.3629112","10.1145\/3590140"],"URL":"https:\/\/doi.org\/10.1145\/3590140.3629112","relation":{},"subject":[],"published":{"date-parts":[[2023,11,27]]},"assertion":[{"value":"2023-11-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}