{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T16:36:03Z","timestamp":1775666163449,"version":"3.50.1"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030882235","type":"print"},{"value":"9783030882242","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-88224-2_7","type":"book-chapter","created":{"date-parts":[[2021,10,5]],"date-time":"2021-10-05T18:38:17Z","timestamp":1633459097000},"page":"126-148","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["A HPC Co-scheduler with Reinforcement Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6952-1195","authenticated-orcid":false,"given":"Abel","family":"Souza","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8486-0897","authenticated-orcid":false,"given":"Kristiaan","family":"Pelckmans","sequence":"additional","affiliation":[]},{"given":"Johan","family":"Tordsson","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,10,6]]},"reference":[{"key":"7_CR1","unstructured":"ARCHER User Survey (2019). https:\/\/www.archer.ac.uk\/about-archer\/reports\/annual\/2019\/ARCHER_UserSurvey2019_Report.pdf"},{"key":"7_CR2","unstructured":"Abel, N.H.: Abel inequality - encyclopedia of mathematics (2021). https:\/\/encyclopediaofmath.org\/index.php?title=Abel_inequality&oldid=18342"},{"key":"7_CR3","doi-asserted-by":"crossref","unstructured":"Ahn, D.H., Garlick, J., Grondona, M., Lipari, D., Springmeyer, B., Schulz, M.: Flux: a next-generation resource management framework for large HPC centers. In: 43rd International Conference on Parallel Processing Workshops. IEEE (2014)","DOI":"10.1109\/ICPPW.2014.15"},{"key":"7_CR4","doi-asserted-by":"publisher","first-page":"3389","DOI":"10.1093\/nar\/25.17.3389","volume":"25","author":"SF Altschul","year":"1997","unstructured":"Altschul, S.F., et al.: Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res. 25, 3389\u20133402 (1997)","journal-title":"Nucleic Acids Res."},{"key":"7_CR5","doi-asserted-by":"crossref","unstructured":"Ambati, P., Bashir, N., Irwin, D., Shenoy, P.: Waiting game: optimally provisioning fixed resources for cloud-enabled schedulers. In: International Conference for High Performance Computing, Networking, Storage and Analysis (2020)","DOI":"10.1109\/SC41405.2020.00071"},{"key":"7_CR6","unstructured":"Berriman, G., et al.: Montage: a grid enabled image mosaic service for the national virtual observatory. In: Astronomical Data Analysis Software and Systems (ADASS) XIII (2004)"},{"key":"7_CR7","doi-asserted-by":"crossref","unstructured":"Burns, B., Grant, B., Oppenheimer, D., Brewer, E., Wilkes, J.: Borg, Omega, and Kubernetes. Queue (2016)","DOI":"10.1145\/2898442.2898444"},{"key":"7_CR8","doi-asserted-by":"crossref","unstructured":"Carastan-Santos, D., De Camargo, R.Y.: Obtaining dynamic scheduling policies with simulation and machine learning. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (2017)","DOI":"10.1145\/3126908.3126955"},{"key":"7_CR9","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1016\/j.parco.2018.08.002","volume":"79","author":"RH Castain","year":"2018","unstructured":"Castain, R.H., Hursey, J., Bouteiller, A., Solt, D.: PMIx: process management for exascale environments. Parallel Comput. 79, 9\u201329 (2018)","journal-title":"Parallel Comput."},{"key":"7_CR10","doi-asserted-by":"crossref","unstructured":"Cirne, W., Berman, F.: A comprehensive model of the supercomputer workload. In: Proceedings of the Fourth Annual IEEE International Workshop on Workload Characterization. WWC-4 (Cat. No. 01EX538), pp. 140\u2013148. IEEE (2001)","DOI":"10.1109\/WWC.2001.990753"},{"key":"7_CR11","unstructured":"Domeniconi, G., Lee, E.K., Venkataswamy, V., Dola, S.: Cush: cognitive scheduler for heterogeneous high performance computing system. In: Workshop on Deep Reinforcement Learning for Knowledge Discover, DRL4KDD 2019 (2019)"},{"key":"7_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1007\/BFb0022289","volume-title":"Job Scheduling Strategies for Parallel Processing","author":"DG Feitelson","year":"1996","unstructured":"Feitelson, D.G.: Packing schemes for gang scheduling. In: Feitelson, D.G., Rudolph, L. (eds.) JSSPP 1996. LNCS, vol. 1162, pp. 89\u2013110. Springer, Heidelberg (1996). https:\/\/doi.org\/10.1007\/BFb0022289"},{"key":"7_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/BFb0022284","volume-title":"Job Scheduling Strategies for Parallel Processing","author":"DG Feitelson","year":"1996","unstructured":"Feitelson, D.G., Rudolph, L.: Toward convergence in job schedulers for parallel supercomputers. In: Feitelson, D.G., Rudolph, L. (eds.) JSSPP 1996. LNCS, vol. 1162, pp. 1\u201326. Springer, Heidelberg (1996). https:\/\/doi.org\/10.1007\/BFb0022284"},{"key":"7_CR14","doi-asserted-by":"publisher","first-page":"2967","DOI":"10.1016\/j.jpdc.2014.06.013","volume":"74","author":"DG Feitelson","year":"2014","unstructured":"Feitelson, D.G., Tsafrir, D., Krakov, D.: Experience with using the parallel workloads archive. J. Parallel Distrib. Comput. 74, 2967\u20132982 (2014)","journal-title":"J. Parallel Distrib. Comput."},{"key":"7_CR15","doi-asserted-by":"publisher","unstructured":"Gainaru, A., Aupy, G.P., Sun, H., Raghavan, P.: Speculative scheduling for stochastic HPC applications. In: Proceedings of the 48th International Conference on Parallel Processing, ICPP 2019, pp. 32:1\u201332:10. ACM (2019). https:\/\/doi.org\/10.1145\/3337821.3337890","DOI":"10.1145\/3337821.3337890"},{"key":"7_CR16","unstructured":"Ghodsi, A., Zaharia, M., Hindman, B., Konwinski, A., Shenker, S., Stoica, I.: Dominant resource fairness: fair allocation of multiple resource types. In: USENIX Symposium on Networked Systems Design and Implementation (2011)"},{"key":"7_CR17","unstructured":"Hindman, B., et al.: Mesos: a platform for fine-grained resource sharing in the data center (2011)"},{"key":"7_CR18","doi-asserted-by":"crossref","unstructured":"Janus, P., Rzadca, K.: SLO-aware colocation of data center tasks based on instantaneous processor requirements. arXiv preprint arXiv:1709.01384 (2017)","DOI":"10.1145\/3127479.3132244"},{"key":"7_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1007\/10968987_3","volume-title":"Job Scheduling Strategies for Parallel Processing","author":"AB Yoo","year":"2003","unstructured":"Yoo, A.B., Jette, M.A., Grondona, M.: SLURM: simple Linux utility for resource management. In: Feitelson, D., Rudolph, L., Schwiegelshohn, U. (eds.) JSSPP 2003. LNCS, vol. 2862, pp. 44\u201360. Springer, Heidelberg (2003). https:\/\/doi.org\/10.1007\/10968987_3"},{"key":"7_CR20","unstructured":"Kolter, J.Z., Johnson, M.J.: REDD: a public data set for energy disaggregation research. In: Workshop on Data Mining Applications in Sustainability (SIGKDD), San Diego, CA (2011)"},{"key":"7_CR21","doi-asserted-by":"crossref","unstructured":"Lakew, E.B., Klein, C., Hernandez-Rodriguez, F., Elmroth, E.: Performance-based service differentiation in clouds. In: 2015 15th IEEE\/ACM International Symposium on Cluster, Cloud and Grid Computing. IEEE (2015)","DOI":"10.1109\/CCGrid.2015.145"},{"key":"7_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3375714","volume":"17","author":"Y Li","year":"2020","unstructured":"Li, Y., Sun, D., Lee, B.C.: Dynamic colocation policies with reinforcement learning. ACM Trans. Architect. Code Optim. (TACO) 17, 1\u201325 (2020)","journal-title":"ACM Trans. Architect. Code Optim. (TACO)"},{"key":"7_CR23","doi-asserted-by":"crossref","unstructured":"Li, Y., Tang, X., Cai, W.: On dynamic bin packing for resource allocation in the cloud. In: Proceedings of the 26th ACM Symposium on Parallelism in Algorithms and Architectures. ACM (2014)","DOI":"10.1145\/2612669.2612675"},{"key":"7_CR24","doi-asserted-by":"crossref","unstructured":"Lifka, D.: The ANL\/IBM SP scheduling system. In: Job Scheduling Strategies for Parallel Processing. IEEE (1995)","DOI":"10.1007\/3-540-60153-8_35"},{"key":"7_CR25","doi-asserted-by":"crossref","unstructured":"Mao, H., Schwarzkopf, M., Venkatakrishnan, S.B., Meng, Z., Alizadeh, M.: Learning scheduling algorithms for data processing clusters. In: ACM Special Interest Group on Data Communication (2019)","DOI":"10.1145\/3341302.3342080"},{"key":"7_CR26","doi-asserted-by":"crossref","unstructured":"Mell, P., Grance, T., et al.: The NIST definition of cloud computing (2011)","DOI":"10.6028\/NIST.SP.800-145"},{"key":"7_CR27","unstructured":"Menage, P.B.: Adding generic process containers to the Linux kernel. In: Proceedings of the Linux Symposium. Citeseer (2007)"},{"key":"7_CR28","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1287\/mnsc.28.1.1","volume":"28","author":"GE Monahan","year":"1982","unstructured":"Monahan, G.E.: State of the art - a survey of partially observable Markov decision processes: theory, models, and algorithms. Manag. Sci. 28, 1\u201316 (1982)","journal-title":"Manag. Sci."},{"key":"7_CR29","doi-asserted-by":"crossref","unstructured":"Moradi, H., Wang, W., Fernandez, A., Zhu, D.: uPredict: a user-level profiler-based predictive framework in multi-tenant clouds. In: 2020 IEEE International Conference on Cloud Engineering (IC2E). IEEE (2020)","DOI":"10.1109\/IC2E48712.2020.00015"},{"key":"7_CR30","doi-asserted-by":"crossref","unstructured":"Patel, T., Tiwari, D.: CLITE: efficient and QoS-aware co-location of multiple latency-critical jobs for warehouse scale computers. In: 2020 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE (2020)","DOI":"10.1109\/HPCA47549.2020.00025"},{"key":"7_CR31","doi-asserted-by":"crossref","unstructured":"Reiss, C., Tumanov, A., Ganger, G.R., Katz, R.H., Kozuch, M.A.: Heterogeneity and dynamicity of clouds at scale: Google trace analysis. In: Proceedings of the Third ACM Symposium on Cloud Computing (2012)","DOI":"10.1145\/2391229.2391236"},{"key":"7_CR32","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1016\/j.jpdc.2017.06.009","volume":"111","author":"A Reuther","year":"2018","unstructured":"Reuther, A., et al.: Scalable system scheduling for HPC and big data. J. Parallel Distrib. Comput. 111, 76\u201392 (2018)","journal-title":"J. Parallel Distrib. Comput."},{"key":"7_CR33","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1007\/978-3-319-57972-6_4","volume-title":"High Performance Computing","author":"N Rocchetti","year":"2017","unstructured":"Rocchetti, N., Da Silva, M., Nesmachnow, S., Tchernykh, A.: Penalty scheduling policy applying user estimates and aging for supercomputing centers. In: Barrios Hern\u00e1ndez, C.J., Gitler, I., Klapp, J. (eds.) CARLA 2016. CCIS, vol. 697, pp. 49\u201360. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-57972-6_4"},{"key":"7_CR34","doi-asserted-by":"crossref","unstructured":"Rodrigo \u00c1lvarez, G.P., \u00d6stberg, P.O., Elmroth, E., Ramakrishnan, L.: A2L2: an application aware flexible HPC scheduling model for low-latency allocation. In: Proceedings of the 8th International Workshop on Virtualization Technologies in Distributed Computing. ACM (2015)","DOI":"10.1145\/2755979.2755983"},{"key":"7_CR35","unstructured":"Rustad, E.: Numascale: Numaconnect (2013). https:\/\/www.numascale.com\/index.php\/numascale-whitepapers\/"},{"key":"7_CR36","doi-asserted-by":"crossref","unstructured":"Souza, A., Pelckmans, K., Ghoshal, D., Ramakrishnan, L., Tordsson, J.: Asa - the adaptive scheduling architecture. In: The 29th International Symposium on High-Performance Parallel and Distributed Computing. ACM (2020)","DOI":"10.1145\/3369583.3392693"},{"key":"7_CR37","doi-asserted-by":"crossref","unstructured":"Staples, G.: Torque resource manager. In: Proceedings of the 2006 ACM\/IEEE Conference on Supercomputing, SC 2006. ACM (2006)","DOI":"10.1145\/1188455.1188464"},{"key":"7_CR38","doi-asserted-by":"crossref","unstructured":"Stevens, R., Taylor, V., Nichols, J., Maccabe, A.B., Yelick, K., Brown, D.: AI for science. Technical report, Argonne National Lab. (ANL), Argonne, IL (United States) (2020)","DOI":"10.2172\/1604756"},{"key":"7_CR39","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"519","DOI":"10.1007\/978-3-030-48340-1_40","volume-title":"Euro-Par 2019: Parallel Processing Workshops","author":"L Thamsen","year":"2020","unstructured":"Thamsen, L., et al.: Hugo: a cluster scheduler that efficiently learns to select complementary data-parallel jobs. In: Schwardmann, U., et al. (eds.) Euro-Par 2019. LNCS, vol. 11997, pp. 519\u2013530. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-48340-1_40"},{"key":"7_CR40","doi-asserted-by":"crossref","unstructured":"Tirmazi, M., et al.: Borg: the next generation. In: SIGOPS European Conference on Computer Systems (EuroSys 2020) (2020)","DOI":"10.1145\/3342195.3387517"},{"key":"7_CR41","series-title":"Advances in Intelligent Systems and Computing","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1007\/978-3-319-91446-6_47","volume-title":"Contemporary Complex Systems and Their Dependability","author":"M Uchro\u0144ski","year":"2019","unstructured":"Uchro\u0144ski, M., Bo\u017cejko, W., Krajewski, Z., Tykierko, M., Wodecki, M.: User estimates inaccuracy study in HPC scheduler. In: Zamojski, W., Mazurkiewicz, J., Sugier, J., Walkowiak, T., Kacprzyk, J. (eds.) DepCoS-RELCOMEX 2018. AISC, vol. 761, pp. 504\u2013514. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-319-91446-6_47"},{"key":"7_CR42","doi-asserted-by":"crossref","unstructured":"Yang, H., Breslow, A., Mars, J., Tang, L.: Bubble-flux: precise online QoS management for increased utilization in warehouse scale computers. In: ACM SIGARCH Computer Architecture News. ACM (2013)","DOI":"10.1145\/2485922.2485974"},{"key":"7_CR43","doi-asserted-by":"crossref","unstructured":"Zhang, D., Dai, D., He, Y., Bao, F.S.: RLScheduler: learn to schedule HPC batch jobs using deep reinforcement learning. arXiv preprint arXiv:1910.08925 (2019)","DOI":"10.1109\/SC41405.2020.00035"}],"container-title":["Lecture Notes in Computer Science","Job Scheduling Strategies for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-88224-2_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T10:16:23Z","timestamp":1725876983000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-88224-2_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030882235","9783030882242"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-88224-2_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"6 October 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"JSSPP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Workshop on Job Scheduling Strategies for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 May 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 May 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"jsspp2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/jsspp.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"10","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"59% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}