{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:46:15Z","timestamp":1767339975856,"version":"3.40.3"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031744297"},{"type":"electronic","value":"9783031744303"}],"license":[{"start":{"date-parts":[[2024,12,21]],"date-time":"2024-12-21T00:00:00Z","timestamp":1734739200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,21]],"date-time":"2024-12-21T00:00:00Z","timestamp":1734739200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-74430-3_2","type":"book-chapter","created":{"date-parts":[[2024,12,20]],"date-time":"2024-12-20T07:44:02Z","timestamp":1734680642000},"page":"20-39","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["An Empirical Study of\u00a0Machine Learning-Based Synthetic Job Trace Generation Methods"],"prefix":"10.1007","author":[{"given":"Monish Soundar","family":"Raj","sequence":"first","affiliation":[]},{"given":"Thomas","family":"MacDougall","sequence":"additional","affiliation":[]},{"given":"Di","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Dong","family":"Dai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,21]]},"reference":[{"key":"2_CR1","unstructured":"Amvrosiadis, G., Park, J.W., Ganger, G.R., Gibson, G.A., Baseman, E., DeBardeleben, N.: On the diversity of cluster workloads and its impact on research results. In: 2018 USENIX Annual Technical Conference USENIX ATC 18), pp. 533\u2013546 (2018)"},{"key":"2_CR2","unstructured":"Argonne Leadership Computing Facility: Anl-alcf-djc-theta dataset [20230101_20231231] (2023). https:\/\/reports.alcf.anl.gov\/data\/ANL-ALCF-DJC-THETA_20230101_20231231.html"},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Bau, D., et al: Seeing what a GAN cannot generate. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4502\u20134511 (2019)","DOI":"10.1109\/ICCV.2019.00460"},{"key":"2_CR4","unstructured":"Collins, J.: Passing the torch from intrepid to Mira (2023). https:\/\/www.alcf.anl.gov\/news\/passing-torch-intrepid-mira"},{"key":"2_CR5","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. Adv. Neural. Inf. Process. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR6","doi-asserted-by":"crossref","unstructured":"Di, S., Guo, H., Pershey, E., Snir, M., Cappello, F.: Characterizing and understanding HPC job failures over the 2K-day life of IBM BlueGene\/Q system. In: 2019 49th Annual IEEE\/IFIP International Conference on Dependable Systems and Networks (DSN), pp. 473\u2013484. IEEE (2019)","DOI":"10.1109\/DSN.2019.00055"},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Feitelson, D.G.: Packing schemes for gang scheduling. In: Workshop on Job Scheduling Strategies for Parallel Processing, pp. 89\u2013110. Springer (1996)","DOI":"10.1007\/BFb0022289"},{"key":"2_CR8","unstructured":"Goodfellow, I., et al.: Generative adversarial NETs. In: Ghahramani, Z., Welling, M., Cortes, C., Lawrence, N., Weinberger, K. (eds.) Advances in Neural Information Processing Systems, vol.\u00a027. Curran Associates, Inc. (2014)"},{"key":"2_CR9","doi-asserted-by":"crossref","unstructured":"Gu, D., et al.: Elasticflow: an elastic serverless training platform for distributed deep learning. In: Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, vol. 2, pp. 266\u2013280 (2023)","DOI":"10.1145\/3575693.3575721"},{"key":"2_CR10","first-page":"27953","volume":"35","author":"W Harvey","year":"2022","unstructured":"Harvey, W., Naderiparizi, S., Masrani, V., Weilbach, C., Wood, F.: Flexible diffusion modeling of long videos. Adv. Neural. Inf. Process. Syst. 35, 27953\u201327965 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR11","doi-asserted-by":"crossref","unstructured":"Hu, Q., Sun, P., Yan, S., Wen, Y., Zhang, T.: Characterization and prediction of deep learning workloads in large-scale GPU datacenters. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201315 (2021)","DOI":"10.1145\/3458817.3476223"},{"key":"2_CR12","unstructured":"Huang, H., Yu, P., Wang, C.: An introduction to image synthesis with generative adversarial NETs. arxiv preprint arXiv:1803.04469 (2018)"},{"key":"2_CR13","doi-asserted-by":"publisher","first-page":"28416","DOI":"10.1109\/ACCESS.2023.3259236","volume":"11","author":"A Islam","year":"2023","unstructured":"Islam, A., Belhaouari, S.B.: Fast and efficient image generation using variational autoencoders and k-nearest neighbor oversampling approach. IEEE Access 11, 28416\u201328426 (2023)","journal-title":"IEEE Access"},{"key":"2_CR14","unstructured":"Jeon, M., Venkataraman, S., Phanishayee, A., Qian, J., Xiao, W., Yang, F.:Analysis of large-scale multi-tenant GPU clusters for DNN training workloads. In: 2019 USENIX Annual Technical Conference (USENIX ATC 19), pp. 947\u2013960 (2019)"},{"key":"2_CR15","unstructured":"Jette, M.: Improved utilization and responsiveness with gang scheduling, Technical report, Lawrence Livermore National Lab.(LLNL), Livermore, CA, United States (1996)"},{"key":"2_CR16","doi-asserted-by":"crossref","unstructured":"Jia, W., et al.: Pushing the limit of molecular dynamics with Ab initio accuracy to 100 million atoms with machine learning. In: SC20: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201314. IEEE (2020)","DOI":"10.1109\/SC41405.2020.00009"},{"key":"2_CR17","doi-asserted-by":"crossref","unstructured":"Joubert, W., Su, S.Q.: An analysis of computational workloads for the ORNL Jaguar system. In: Proceedings of the 26th ACM International Conference on Supercomputing, pp. 247\u2013256 (2012)","DOI":"10.1145\/2304576.2304611"},{"key":"2_CR18","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational Bayes (2022)"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Kurth, T., et\u00a0al.: Exascale deep learning for climate analytics. In: SC18: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 649\u2013660. IEEE (2018)","DOI":"10.1109\/SC.2018.00054"},{"key":"2_CR20","unstructured":"Lee, J., Lee, O.: CTGAN vs TGAN? Which one is more suitable for generating synthetic EEG data. J. Theoret. Appl. Inf. Technol. 99(10) (2021)"},{"issue":"11","key":"2_CR21","doi-asserted-by":"publisher","first-page":"1105","DOI":"10.1016\/S0743-7315(03)00108-4","volume":"63","author":"U Lublin","year":"2003","unstructured":"Lublin, U., Feitelson, D.G.: The workload on parallel supercomputers: modeling the characteristics of rigid jobs. J. Parallel Distrib. Comput. 63(11), 1105\u20131122 (2003)","journal-title":"J. Parallel Distrib. Comput."},{"key":"2_CR22","unstructured":"National Center for Supercomputing Applications: Data sets - blue waters (2023). https:\/\/bluewaters.ncsa.illinois.edu\/data-sets"},{"key":"2_CR23","unstructured":"NCSA: Blue waters data sets (2023). https:\/\/bluewaters.ncsa.illinois.edu\/data-sets"},{"key":"2_CR24","doi-asserted-by":"crossref","unstructured":"Patel, T., Liu, Z., Kettimuthu, R., Rich, P., Allcock, W., Tiwari, D.: Job characteristics on large-scale systems: long-term analysis, quantification, and implications. In: SC20: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201317. IEEE (2020)","DOI":"10.1109\/SC41405.2020.00088"},{"key":"2_CR25","doi-asserted-by":"crossref","unstructured":"Patki, N., Wedge, R., Veeramachaneni, K.: The synthetic data vault. In: 2016 IEEE International Conference on Data Science and Advanced Analytics (DSAA), pp. 399\u2013410. IEEE (2016)","DOI":"10.1109\/DSAA.2016.49"},{"key":"2_CR26","doi-asserted-by":"crossref","unstructured":"Paul, A.K., Choi, J.Y., Karimi, A.M., Wang, F.: Machine learning assisted HPC workload trace generation for leadership scale storage systems. In: Proceedings of the 31st International Symposium on High-Performance Parallel and Distributed Computing, pp. 199\u2013212 (2022)","DOI":"10.1145\/3502181.3531457"},{"key":"2_CR27","doi-asserted-by":"publisher","first-page":"206","DOI":"10.1016\/j.jpdc.2017.09.002","volume":"111","author":"GP Rodrigo","year":"2018","unstructured":"Rodrigo, G.P., \u00d6stberg, P.O., Elmroth, E., Antypas, K., Gerber, R., Ramakrishnan, L.: Towards understanding HPC users and systems: a NERSC case study. J. Parallel Distrib. Comput. 111, 206\u2013221 (2018)","journal-title":"J. Parallel Distrib. Comput."},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Rodrigo\u00a0\u00c1lvarez, G.P., \u00d6stberg, P.O., Elmroth, E., Antypas, K., Gerber, R., Ramakrishnan, L.: HPC system lifetime story: workload characterization and evolutionary analyses on NERSC systems. In: Proceedings of the 24th International Symposium on High-Performance Parallel and Distributed Computing, pp. 57\u201360 (2015)","DOI":"10.1145\/2749246.2749270"},{"key":"2_CR29","doi-asserted-by":"crossref","unstructured":"Samsi, S., et\u00a0al.: The MIT Supercloud dataset. In: 2021 IEEE High Performance Extreme Computing Conference (HPEC), pp.\u00a01\u20138. IEEE (2021)","DOI":"10.1109\/HPEC49654.2021.9622850"},{"key":"2_CR30","unstructured":"SDV-DEV: SDV: synthetic data vault (2023). https:\/\/github.com\/sdv-dev\/SDV"},{"key":"2_CR31","unstructured":"SDV Developers: Copulagan synthesizer \u2013 SDV docs (2023). https:\/\/docs.sdv.dev\/sdv\/single-table-data\/modeling\/synthesizers\/copulagansynthesizer"},{"key":"2_CR32","unstructured":"Synthetic Data Vault (SDV) Development Team: Copulagan synthesizer - SDV documentation (2023). https:\/\/docs.sdv.dev\/sdv\/single-table-data\/modeling\/synthesizers\/copulagansynthesizer"},{"key":"2_CR33","unstructured":"Theta: Theta (2023). https:\/\/reports.alcf.anl.gov\/data\/theta.html"},{"key":"2_CR34","unstructured":"ThetaGPU: ThetaGPU (2023). https:\/\/reports.alcf.anl.gov\/data\/thetagpu.html"},{"issue":"9","key":"2_CR35","doi-asserted-by":"publisher","first-page":"2170","DOI":"10.1109\/TPDS.2020.2984821","volume":"31","author":"L Versluis","year":"2020","unstructured":"Versluis, L., et al.: The workflow trace archive: open-access data from public and private computing infrastructures. IEEE Trans. Parallel Distrib. Syst. 31(9), 2170\u20132184 (2020)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"2_CR36","doi-asserted-by":"crossref","unstructured":"Wang, F., Oral, S., Sen, S., Imam, N.: Learning from five-year resource-utilization data of titan system. In: 2019 IEEE International Conference on Cluster Computing (CLUSTER), pp.\u00a01\u20136. IEEE (2019)","DOI":"10.1109\/CLUSTER.2019.8891001"},{"key":"2_CR37","unstructured":"Weng, Q., et al.: Beware of fragmentation: scheduling GPU-sharing workloads with fragmentation gradient descent. In: 2023 USENIX Annual Technical Conference (USENIX ATC 23), pp. 995\u20131008 (2023)"},{"key":"2_CR38","unstructured":"Xu, L., Skoularidou, M., Cuesta-Infante, A., Veeramachaneni, K.: Modeling tabular data using conditional GAN. In: Wallach, H., Larochelle, H., Beygelzimer, A., d\u2019Alch\u00e9-Buc, F., Fox, E., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol.\u00a032. Curran Associates, Inc. (2019)"},{"key":"2_CR39","unstructured":"Xu, L., Skoularidou, M., Cuesta-Infante, A., Veeramachaneni, K.: Modeling tabular data using conditional GAN. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"2_CR40","unstructured":"YDataAI: YData synthetic: synthetic data generation (2023). https:\/\/github.com\/ydataai\/ydata-synthetic"},{"key":"2_CR41","doi-asserted-by":"crossref","unstructured":"Zhang, D., Dai, D., He, Y., Bao, F.S., Xie, B.: RLScheduler: an automated HPC batch job scheduler using reinforcement learning. In: SC20: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201315. IEEE (2020)","DOI":"10.1109\/SC41405.2020.00035"},{"key":"2_CR42","doi-asserted-by":"crossref","unstructured":"Zhang, D., Dai, D., Xie, B.: SchedInspector: a batch job scheduling inspector using reinforcement learning. In: Proceedings of the 31st International Symposium on High-Performance Parallel and Distributed Computing, pp. 97\u2013109. HPDC \u201922, Association for Computing Machinery, New York, NY, USA (2022)","DOI":"10.1145\/3502181.3531470"},{"key":"2_CR43","doi-asserted-by":"crossref","unstructured":"Zhang, D., Soundar\u00a0Raj, M., Xie, B., Di, S., Dai, D.: Cross-system analysis of job characterization and scheduling in large-scale computing clusters. In: 2024 IEEE International Parallel and Distributed Processing Symposium (IPDPS). IEEE (2024)","DOI":"10.1109\/IPDPS57955.2024.00069"}],"container-title":["Lecture Notes in Computer Science","Job Scheduling Strategies for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-74430-3_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,20]],"date-time":"2024-12-20T08:02:58Z","timestamp":1734681778000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-74430-3_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,21]]},"ISBN":["9783031744297","9783031744303"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-74430-3_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,21]]},"assertion":[{"value":"21 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"JSSPP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Workshop on Job Scheduling Strategies for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"San Francisco","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 May 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 May 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"jsspp2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}