{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T17:06:22Z","timestamp":1774631182170,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":21,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T00:00:00Z","timestamp":1725408000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,9,4]]},"DOI":"10.1145\/3678015.3680479","type":"proceedings-article","created":{"date-parts":[[2024,8,29]],"date-time":"2024-08-29T12:19:32Z","timestamp":1724933972000},"page":"95-101","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Seraph: A Performance-Cost Aware Tuner for Training Reinforcement Learning Model on Serverless Computing"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-9240-8579","authenticated-orcid":false,"given":"Jinbo","family":"Han","sequence":"first","affiliation":[{"name":"Institute of Parallel and Distributed Systems, SEIEE, Shanghai Jiao, Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4983-6047","authenticated-orcid":false,"given":"Xingda","family":"Wei","sequence":"additional","affiliation":[{"name":"Institute of Parallel and Distributed Systems, SEIEE, Shanghai Jiao, Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6115-8130","authenticated-orcid":false,"given":"Rong","family":"Chen","sequence":"additional","affiliation":[{"name":"Institute of Parallel and Distributed Systems, SEIEE, Shanghai Jiao, Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9720-0361","authenticated-orcid":false,"given":"Haibo","family":"Chen","sequence":"additional","affiliation":[{"name":"Institute of Parallel and Distributed Systems, SEIEE, Shanghai Jiao, Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,9,4]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1613\/JAIR.3912"},{"key":"e_1_3_2_1_2_1","volume-title":"Retrieved","author":"Catto Erin","year":"2020","unstructured":"Erin Catto. 2020. Box2D 2.4.1 A 2D physics engine for games. Retrieved May 28, 2024 from https:\/\/box2d.org\/documentation\/"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01442131"},{"key":"e_1_3_2_1_4_1","volume-title":"AdaBatch: Adaptive Batch Sizes for Training Deep Neural Networks. CoRR abs\/1712.02029","author":"Devarakonda Aditya","year":"2017","unstructured":"Aditya Devarakonda, Maxim Naumov, and Michael Garland. 2017. AdaBatch: Adaptive Batch Sizes for Training Deep Neural Networks. CoRR abs\/1712.02029 (2017). arXiv:1712.02029 http:\/\/arxiv.org\/abs\/1712.02029"},{"key":"e_1_3_2_1_5_1","volume-title":"Hyperparameters in Reinforcement Learning and How To Tune Them. In International Conference on Machine Learning, ICML 2023","volume":"9149","author":"Eimer Theresa","year":"2023","unstructured":"Theresa Eimer, Marius Lindauer, and Roberta Raileanu. 2023. Hyperparameters in Reinforcement Learning and How To Tune Them. In International Conference on Machine Learning, ICML 2023, 23-29 July 2023, Honolulu, Hawaii, USA (Proceedings of Machine Learning Research, Vol. 202), Andreas Krause, Emma Brunskill, Kyunghyun Cho, Barbara Engelhardt, Sivan Sabato, and Jonathan Scarlett (Eds.). PMLR, 9104--9149. https:\/\/proceedings.mlr.press\/v202\/eimer23a.html"},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan","author":"Espeholt Lasse","year":"2018","unstructured":"Lasse Espeholt, Hubert Soyer, R\u00e9mi Munos, Karen Simonyan, Volodymyr Mnih, Tom Ward, Yotam Doron, Vlad Firoiu, Tim Harley, Iain Dunning, Shane Legg, and Koray Kavukcuoglu. 2018. IMPALA: Scalable Distributed Deep-RL with Importance Weighted Actor-Learner Architectures. In Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10-15, 2018 (Proceedings of Machine Learning Research, Vol. 80), Jennifer G. Dy and Andreas Krause (Eds.). PMLR, 1406--1415. http:\/\/proceedings.mlr.press\/v80\/espeholt18a.html"},{"key":"e_1_3_2_1_7_1","volume-title":"8th International Conference on Learning Representations, ICLR 2020","author":"Ilyas Andrew","year":"2020","unstructured":"Andrew Ilyas, Logan Engstrom, Shibani Santurkar, Dimitris Tsipras, Firdaus Janoos, Larry Rudolph, and Aleksander Madry. 2020. A Closer Look at Deep Policy Gradients. In 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26-30, 2020. OpenReview.net. https:\/\/openreview.net\/forum?id=ryxdEkHtPS"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3603269.3604816"},{"key":"e_1_3_2_1_9_1","volume-title":"Hyperparameter Tuning for Deep Reinforcement Learning Applications. CoRR abs\/2201.11182","author":"Kiran Mariam","year":"2022","unstructured":"Mariam Kiran and Buse Melis \u00d6zyildirim. 2022. Hyperparameter Tuning for Deep Reinforcement Learning Applications. CoRR abs\/2201.11182 (2022). arXiv:2201.11182 https:\/\/arxiv.org\/abs\/2201.11182"},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan","author":"Liang Eric","year":"2018","unstructured":"Eric Liang, Richard Liaw, Robert Nishihara, Philipp Moritz, Roy Fox, Ken Goldberg, Joseph Gonzalez, Michael I. Jordan, and Ion Stoica. 2018. RLlib: Abstractions for Distributed Reinforcement Learning. In Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10-15, 2018 (Proceedings of Machine Learning Research, Vol. 80), Jennifer G. Dy and Andreas Krause (Eds.). PMLR, 3059--3068. http:\/\/proceedings.mlr.press\/v80\/liang18b.html"},{"key":"e_1_3_2_1_11_1","volume-title":"Tune: A Research Platform for Distributed Model Selection and Training. arXiv:1807.05118 [cs.LG] https:\/\/arxiv.org\/abs\/1807.05118","author":"Liaw Richard","year":"2018","unstructured":"Richard Liaw, Eric Liang, Robert Nishihara, Philipp Moritz, Joseph E. Gonzalez, and Ion Stoica. 2018. Tune: A Research Platform for Distributed Model Selection and Training. arXiv:1807.05118 [cs.LG] https:\/\/arxiv.org\/abs\/1807.05118"},{"key":"e_1_3_2_1_12_1","volume-title":"An Empirical Model of Large-Batch Training. CoRR abs\/1812.06162","author":"McCandlish Sam","year":"2018","unstructured":"Sam McCandlish, Jared Kaplan, Dario Amodei, and OpenAI Dota Team. 2018. An Empirical Model of Large-Batch Training. CoRR abs\/1812.06162 (2018). arXiv:1812.06162 http:\/\/arxiv.org\/abs\/1812.06162"},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the 33nd International Conference on Machine Learning, ICML 2016, New York City, NY, USA, June 19-24, 2016 (JMLR Workshop and Conference Proceedings","volume":"1937","author":"Mnih Volodymyr","year":"2016","unstructured":"Volodymyr Mnih, Adri\u00e0 Puigdom\u00e8nech Badia, Mehdi Mirza, Alex Graves, Timothy P. Lillicrap, Tim Harley, David Silver, and Koray Kavukcuoglu. 2016. Asynchronous Methods for Deep Reinforcement Learning. In Proceedings of the 33nd International Conference on Machine Learning, ICML 2016, New York City, NY, USA, June 19-24, 2016 (JMLR Workshop and Conference Proceedings, Vol. 48), Maria-Florina Balcan and Kilian Q. Weinberger (Eds.). JMLR.org, 1928--1937. http:\/\/proceedings.mlr.press\/v48\/mniha16.html"},{"key":"e_1_3_2_1_14_1","volume-title":"Proximal Policy Optimization Algorithms. CoRR abs\/1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal Policy Optimization Algorithms. CoRR abs\/1707.06347 (2017). arXiv:1707.06347 http:\/\/arxiv.org\/abs\/1707.06347"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1038\/NATURE16961"},{"key":"e_1_3_2_1_16_1","volume-title":"Cliff Diving: Exploring Reward Surfaces in Reinforcement Learning Environments. In International Conference on Machine Learning, ICML 2022","volume":"20776","author":"Sullivan Ryan","year":"2022","unstructured":"Ryan Sullivan, Jordan K. Terry, Benjamin Black, and John P. Dickerson. 2022. Cliff Diving: Exploring Reward Surfaces in Reinforcement Learning Environments. In International Conference on Machine Learning, ICML 2022, 17-23 July 2022, Baltimore, Maryland, USA (Proceedings of Machine Learning Research, Vol. 162), Kamalika Chaudhuri, Stefanie Jegelka, Le Song, Csaba Szepesv\u00e1ri, Gang Niu, and Sivan Sabato (Eds.). PMLR, 20744--20776. https:\/\/proceedings.mlr.press\/v162\/sullivan22a.html"},{"key":"e_1_3_2_1_17_1","volume-title":"Barto","author":"Sutton Richard S.","year":"1998","unstructured":"Richard S. Sutton and Andrew G. Barto. 1998. Reinforcement learning - an introduction. MIT Press. https:\/\/www.worldcat.org\/oclc\/37293240"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737391"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1609\/AAAI.V38I15.29592"},{"key":"e_1_3_2_1_21_1","volume-title":"Jolteon: Unleashing the Promise of Serverless for Serverless Workflows. In 21st USENIX Symposium on Networked Systems Design and Implementation, NSDI 2024","author":"Zhang Zili","year":"2024","unstructured":"Zili Zhang, Chao Jin, and Xin Jin. 2024. Jolteon: Unleashing the Promise of Serverless for Serverless Workflows. In 21st USENIX Symposium on Networked Systems Design and Implementation, NSDI 2024, Santa Clara, CA, April 15-17, 2024, Laurent Vanbever and Irene Zhang (Eds.). USENIX Association, 167--183. https:\/\/www.usenix.org\/conference\/nsdi24\/presentation\/zhang-zili-jolteon"}],"event":{"name":"APSys '24: 15th ACM SIGOPS Asia-Pacific Workshop on Systems","location":"Kyoto Japan","acronym":"APSys '24","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 15th ACM SIGOPS Asia-Pacific Workshop on Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3678015.3680479","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3678015.3680479","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T02:14:10Z","timestamp":1755915250000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3678015.3680479"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,4]]},"references-count":21,"alternative-id":["10.1145\/3678015.3680479","10.1145\/3678015"],"URL":"https:\/\/doi.org\/10.1145\/3678015.3680479","relation":{},"subject":[],"published":{"date-parts":[[2024,9,4]]},"assertion":[{"value":"2024-09-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}