{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T23:18:36Z","timestamp":1768346316472,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":91,"publisher":"ACM","funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2527416,2534241,2534286,2523997,2124897,2315614,2337914"],"award-info":[{"award-number":["2527416,2534241,2534286,2523997,2124897,2315614,2337914"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,19]]},"DOI":"10.1145\/3772052.3772227","type":"proceedings-article","created":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T16:19:00Z","timestamp":1768321140000},"page":"225-239","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Multi-Agent Reinforcement Learning with Serverless Computing"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-6919-8338","authenticated-orcid":false,"given":"Rui","family":"Wei","sequence":"first","affiliation":[{"name":"Stevens Institute of Technology, Hoboken, NJ, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5790-4981","authenticated-orcid":false,"given":"Hanfei","family":"Yu","sequence":"additional","affiliation":[{"name":"Stevens Institute of Technology, Hoboken, NJ, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0982-8062","authenticated-orcid":false,"given":"Xikang","family":"Song","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, IL, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3642-3569","authenticated-orcid":false,"given":"Jian","family":"Li","sequence":"additional","affiliation":[{"name":"Stony Brook University, Stony Brook, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7253-2458","authenticated-orcid":false,"given":"Devesh","family":"Tiwari","sequence":"additional","affiliation":[{"name":"Northeastern University, Boston, MA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4484-4892","authenticated-orcid":false,"given":"Ying","family":"Mao","sequence":"additional","affiliation":[{"name":"Fordham University, New York, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1444-2657","authenticated-orcid":false,"given":"Hao","family":"Wang","sequence":"additional","affiliation":[{"name":"Stevens Institute of Technology, Hoboken, NJ, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,1,13]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Akshat Agarwal Sumit Kumar and Katia Sycara. 2019. Learning Transferable Cooperative Behavior in Multi-Agent Teams. arXiv:1906.01202 [cs.LG] https:\/\/arxiv.org\/abs\/1906.01202"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1017\/pds.2021.17","article-title":"A multi-agent reinforcement learning framework for intelligent manufacturing with autonomous mobile robots","volume":"1","author":"Agrawal Akash","year":"2021","unstructured":"Akash Agrawal, Sung Jun Won, Tushar Sharma, Mayuri Deshpande, and Christopher McComb. 2021. A multi-agent reinforcement learning framework for intelligent manufacturing with autonomous mobile robots. Proceedings of the Design Society 1 (2021), 161\u2013170.","journal-title":"Proceedings of the Design Society"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.3390\/jpm12020309"},{"key":"e_1_3_2_1_4_1","unstructured":"Amazon Web Services. 2025. AWS Elastic Compute Cloud. https:\/\/aws.amazon.com\/ec2\/."},{"key":"e_1_3_2_1_5_1","unstructured":"Amazon Web Services. 2025. AWS Lambda pricing rules. https:\/\/aws.amazon.com\/lambda\/pricing\/."},{"key":"e_1_3_2_1_6_1","unstructured":"Apache. 2018. Apache OpenWhisk Official Website. https:\/\/openwhisk.apache.org."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","first-page":"1443","DOI":"10.1038\/s41467-022-28957-7","article-title":"Scientific multi-agent reinforcement learning for wall-models of turbulent flows","volume":"13","author":"Jane Bae H","year":"2022","unstructured":"H Jane Bae and Petros Koumoutsakos. 2022. Scientific multi-agent reinforcement learning for wall-models of turbulent flows. Nature Communications 13, 1 (2022), 1443.","journal-title":"Nature Communications"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2007.913919"},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the Fifteenth European Conference on Computer Systems (EuroSys).","author":"Cadden James","year":"2020","unstructured":"James Cadden, Thomas Unger, Yara Awad, Han Dong, Orran Krieger, and Jonathan Appavoo. 2020. SEUSS: Skip Redundant Paths to Make Serverless Fast. In Proceedings of the Fifteenth European Conference on Computer Systems (EuroSys)."},{"key":"e_1_3_2_1_10_1","volume-title":"Albrecht","author":"Christianos Filippos","year":"2021","unstructured":"Filippos Christianos, Georgios Papoudakis, Arrasy Rahman, and Stefano V. Albrecht. 2021. Scaling Multi-Agent Reinforcement Learning with Selective Parameter Sharing. ArXiv abs\/2102.07475 (2021). https:\/\/api.semanticscholar.org\/CorpusID:231924963"},{"key":"e_1_3_2_1_11_1","unstructured":"Cloud Native Computing Foundation. 2018. Knative. https:\/\/knative.dev\/docs\/."},{"key":"e_1_3_2_1_12_1","unstructured":"Christian Schroeder de Witt Tarun Gupta Denys Makoviichuk Viktor Makoviychuk Philip H. S. Torr Mingfei Sun and Shimon Whiteson. 2020. Is Independent Learning All You Need in the StarCraft Multi-Agent Challenge? arXiv:2011.09533 [cs.AI] https:\/\/arxiv.org\/abs\/2011.09533"},{"key":"e_1_3_2_1_13_1","volume-title":"AdaBatch: Adaptive Batch Sizes for Training Deep Neural Networks. CoRR abs\/1712.02029","author":"Devarakonda Aditya","year":"2017","unstructured":"Aditya Devarakonda, Maxim Naumov, and Michael Garland. 2017. AdaBatch: Adaptive Batch Sizes for Training Deep Neural Networks. CoRR abs\/1712.02029 (2017). arXiv:1712.02029 http:\/\/arxiv.org\/abs\/1712.02029"},{"key":"e_1_3_2_1_14_1","volume-title":"Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track. https:\/\/openreview.net\/forum?id=5OjLGiJW3u","author":"Ellis Benjamin","year":"2023","unstructured":"Benjamin Ellis, Jonathan Cook, Skander Moalla, Mikayel Samvelyan, Mingfei Sun, Anuj Mahajan, Jakob Nicolaus Foerster, and Shimon Whiteson. 2023. SMACv2: An Improved Benchmark for Cooperative Multi-Agent Reinforcement Learning. In Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track. https:\/\/openreview.net\/forum?id=5OjLGiJW3u"},{"key":"e_1_3_2_1_15_1","volume-title":"SEED RL: Scalable and Efficient Deep-RL with Accelerated Central Inference. arXiv:1910.06591 [cs.LG] https:\/\/arxiv.org\/abs\/1910.06591","author":"Espeholt Lasse","year":"2020","unstructured":"Lasse Espeholt, Rapha\u00ebl Marinier, Piotr Stanczyk, Ke Wang, and Marcin Michalski. 2020. SEED RL: Scalable and Efficient Deep-RL with Accelerated Central Inference. arXiv:1910.06591 [cs.LG] https:\/\/arxiv.org\/abs\/1910.06591"},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the 35th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"1416","author":"Espeholt Lasse","year":"2018","unstructured":"Lasse Espeholt, Hubert Soyer, Remi Munos, Karen Simonyan, Vlad Mnih, Tom Ward, Yotam Doron, Vlad Firoiu, Tim Harley, Iain Dunning, Shane Legg, and Koray Kavukcuoglu. 2018. IMPALA: Scalable Distributed Deep-RL with Importance Weighted Actor-Learner Architectures. In Proceedings of the 35th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 80), Jennifer Dy and Andreas Krause (Eds.). PMLR, 1407\u20131416. https:\/\/proceedings.mlr.press\/v80\/espeholt18a.html"},{"key":"e_1_3_2_1_17_1","unstructured":"Jakob Foerster Nantas Nardelli Gregory Farquhar Triantafyllos Afouras Philip H. S. Torr Pushmeet Kohli and Shimon Whiteson. 2018. Stabilising Experience Replay for Deep Multi-Agent Reinforcement Learning. arXiv:1702.08887 [cs.AI] https:\/\/arxiv.org\/abs\/1702.08887"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/3157096.3157336"},{"key":"e_1_3_2_1_19_1","unstructured":"Google Cloud Platform. 2025. GCP Compute Engine Instance. https:\/\/cloud.google.com\/compute\/docs\/instances."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2023.103905"},{"key":"e_1_3_2_1_21_1","volume-title":"Cooperative Multi-agent Control Using Deep Reinforcement Learning","author":"Gupta Jayesh K.","unstructured":"Jayesh K. Gupta, Maxim Egorov, and Mykel Kochenderfer. 2017. Cooperative Multi-agent Control Using Deep Reinforcement Learning. In Autonomous Agents and Multiagent Systems, Gita Sukthankar and Juan A. Rodriguez-Aguilar (Eds.). Springer International Publishing, Cham, 66\u201383."},{"key":"e_1_3_2_1_22_1","volume-title":"Soft Actor-Critic Algorithms and Applications. CoRR abs\/1812.05905","author":"Haarnoja Tuomas","year":"2018","unstructured":"Tuomas Haarnoja, Aurick Zhou, Kristian Hartikainen, George Tucker, Sehoon Ha, Jie Tan, Vikash Kumar, Henry Zhu, Abhishek Gupta, Pieter Abbeel, and Sergey Levine. 2018. Soft Actor-Critic Algorithms and Applications. CoRR abs\/1812.05905 (2018). arXiv:1812.05905 http:\/\/arxiv.org\/abs\/1812.05905"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3678015.3680479"},{"key":"e_1_3_2_1_24_1","unstructured":"Pablo Hernandez-Leal Michael Kaisers Tim Baarslag and Enrique Munoz de Cote. 2019. A Survey of Learning in Multiagent Environments: Dealing with Non-Stationarity. arXiv:1707.09183 [cs.MA] https:\/\/arxiv.org\/abs\/1707.09183"},{"key":"e_1_3_2_1_25_1","unstructured":"Geoffrey Hinton Nish Srivastava and Kevin Swersky. 2017. Lecture 6e rmsprop: Divide the gradient by a running average of its recent magnitude. https:\/\/www.cs.toronto.edu\/~tijmen\/csc321\/slides\/lecture_slides_lec6.pdf Lecture slides CSC321."},{"key":"e_1_3_2_1_26_1","volume-title":"Foerster","author":"Hu Hengyuan","year":"2019","unstructured":"Hengyuan Hu and Jakob N. Foerster. 2019. Simplified Action Decoder for Deep Multi-Agent Reinforcement Learning. ArXiv abs\/1912.02288 (2019). https:\/\/api.semanticscholar.org\/CorpusID:208637067"},{"key":"e_1_3_2_1_27_1","volume-title":"Haibin Wu, and Shih wei Liao.","author":"Hu Jian","year":"2023","unstructured":"Jian Hu, Siyang Jiang, Seth Austin Harding, Haibin Wu, and Shih wei Liao. 2023. Rethinking the Implementation Tricks and Monotonicity Constraint in Cooperative Multi-Agent Reinforcement Learning. arXiv:2102.03479 [cs.LG] https:\/\/arxiv.org\/abs\/2102.03479"},{"key":"e_1_3_2_1_28_1","volume-title":"MARLlib: A Scalable and Efficient Multi-agent Reinforcement Learning Library. Journal of Machine Learning Research","author":"Hu Siyi","year":"2023","unstructured":"Siyi Hu, Yifan Zhong, Minquan Gao, Weixun Wang, Hao Dong, Xiaodan Liang, Zhihui Li, Xiaojun Chang, and Yaodong Yang. 2023. MARLlib: A Scalable and Efficient Multi-agent Reinforcement Learning Library. Journal of Machine Learning Research (2023)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/3635637.3662937"},{"key":"e_1_3_2_1_30_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Kakade Sham","year":"2020","unstructured":"Sham Kakade and John Langford. 2020. A Closer Look at Deep Policy Gradients. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_31_1","volume-title":"Adam: A Method for Stochastic Optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A Method for Stochastic Optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1611835114"},{"key":"e_1_3_2_1_33_1","volume-title":"Trust Region Policy Optimisation in Multi-Agent Reinforcement Learning. ArXiv abs\/2109.11251","author":"Kuba Jakub Grudzien","year":"2021","unstructured":"Jakub Grudzien Kuba, Ruiqing Chen, Munning Wen, Ying Wen, Fanglei Sun, Jun Wang, and Yaodong Yang. 2021. Trust Region Policy Optimisation in Multi-Agent Reinforcement Learning. ArXiv abs\/2109.11251 (2021). https:\/\/api.semanticscholar.org\/CorpusID:237605219"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","first-page":"101884","DOI":"10.1016\/j.jocs.2022.101884","article-title":"Deep reinforcement learning for computational fluid dynamics on HPC systems","volume":"65","author":"Kurz Marius","year":"2022","unstructured":"Marius Kurz, Philipp Offenh\u00e4user, Dominic Viola, Oleksandr Shcherbakov, Michael Resch, and Andrea Beck. 2022. Deep reinforcement learning for computational fluid dynamics on HPC systems. Journal of Computational Science 65 (2022), 101884.","journal-title":"Journal of Computational Science"},{"key":"e_1_3_2_1_35_1","first-page":"1180","article-title":"Multi-agent graph reinforcement learning method for electric vehicle on-route charging guidance in coupled transportation electrification","volume":"15","author":"Li Yujing","year":"2023","unstructured":"Yujing Li, Su Su, Minghao Zhang, Qiujiang Liu, Xiaobo Nie, Mingchao Xia, and Dan D Micu. 2023. Multi-agent graph reinforcement learning method for electric vehicle on-route charging guidance in coupled transportation electrification. IEEE Transactions on Sustainable Energy 15, 2 (2023), 1180\u20131193.","journal-title":"IEEE Transactions on Sustainable Energy"},{"key":"e_1_3_2_1_36_1","unstructured":"Zijun Li Linsong Guo Quan Chen Jiagan Cheng Chuhao Xu Deze Zeng Zhuo Song Tao Ma Yong Yang Chao Li et al. 2022. Help Rather Than Recycle: Alleviating Cold Startup in Serverless Computing Through Inter-Function Container Sharing. In 2022 USENIX annual technical conference (USENIX ATC)."},{"key":"e_1_3_2_1_37_1","volume-title":"RLlib: Abstractions for Distributed Reinforcement Learning. In International Conference on Machine Learning. https:\/\/api.semanticscholar.org\/CorpusID:49546141","author":"Liang Eric","year":"2017","unstructured":"Eric Liang, Richard Liaw, Robert Nishihara, Philipp Moritz, Roy Fox, Ken Goldberg, Joseph E. Gonzalez, Michael I. Jordan, and Ion Stoica. 2017. RLlib: Abstractions for Distributed Reinforcement Learning. In International Conference on Machine Learning. https:\/\/api.semanticscholar.org\/CorpusID:49546141"},{"key":"e_1_3_2_1_38_1","unstructured":"Timothy P. Lillicrap Jonathan J. Hunt Alexander Pritzel Nicolas Heess Tom Erez Yuval Tassa David Silver and Daan Wierstra. 2019. Continuous control with deep reinforcement learning. arXiv:1509.02971 [cs.LG] https:\/\/arxiv.org\/abs\/1509.02971"},{"key":"e_1_3_2_1_39_1","volume-title":"Oh (Eds.)","volume":"35","author":"Liu Zongkai","year":"2022","unstructured":"Zongkai Liu, Chao Yu, Yaodong Yang, peng sun, Zifan Wu, and Yuan Li. 2022. A Unified Diversity Measure for Multiagent Reinforcement Learning. In Advances in Neural Information Processing Systems, S. Koyejo, S. Mohamed, A. Agarwal, D. Belgrave, K. Cho, and A. Oh (Eds.), Vol. 35. Curran Associates, Inc., 10339\u201310352. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/435cce71b4007699041dfffa4f034079-Paper-Conference.pdf"},{"key":"e_1_3_2_1_40_1","unstructured":"Ryan Lowe Yi Wu Aviv Tamar Jean Harb Pieter Abbeel and Igor Mordatch. 2017. Multi-Agent Actor-Critic for Mixed Cooperative-Competitive Environments. Neural Information Processing Systems (NIPS)(2017)."},{"key":"e_1_3_2_1_41_1","volume-title":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS).","author":"Mahapatra Rohan","year":"2024","unstructured":"Rohan Mahapatra, Soroush Ghodrati, Byung Hoon Ahn, Sean Kinzer, Shu-Ting Wang, Hanyang Xu, Lavanya Karthikeyan, Hardik Sharma, Amir Yazdanbakhsh, Mohammad Alian, et al. 2024. In-storage Domain-Specific Acceleration for Serverless Computing. In Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)."},{"key":"e_1_3_2_1_42_1","volume-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Mahgoub Ashraf","year":"2022","unstructured":"Ashraf Mahgoub, Edgardo Barsallo Yi, Karthick Shankar, Sameh Elnikety, Somali Chaterji, and Saurabh Bagchi. 2022. ORION and the Three Rights: Sizing, Bundling, and Prewarming for Serverless DAGs. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22). USENIX Association, Carlsbad, CA, 303\u2013320. https:\/\/www.usenix.org\/conference\/osdi22\/presentation\/mahgoub"},{"key":"e_1_3_2_1_43_1","volume-title":"An Empirical Model of Large-Batch Training. CoRR abs\/1812.06162","author":"McCandlish Sam","year":"2018","unstructured":"Sam McCandlish, Jared Kaplan, Dario Amodei, and OpenAI Dota Team. 2018. An Empirical Model of Large-Batch Training. CoRR abs\/1812.06162 (2018). arXiv:1812.06162 http:\/\/arxiv.org\/abs\/1812.06162"},{"key":"e_1_3_2_1_44_1","volume-title":"SRL: Scaling Distributed Reinforcement Learning to Over Ten Thousand Cores. arXiv:2306.16688 [cs.DC] https:\/\/arxiv.org\/abs\/2306.16688","author":"Mei Zhiyu","year":"2024","unstructured":"Zhiyu Mei, Wei Fu, Jiaxuan Gao, Guangju Wang, Huanchen Zhang, and Yi Wu. 2024. SRL: Scaling Distributed Reinforcement Learning to Over Ten Thousand Cores. arXiv:2306.16688 [cs.DC] https:\/\/arxiv.org\/abs\/2306.16688"},{"key":"e_1_3_2_1_45_1","volume-title":"Docker: Lightweight Linux Containers for Consistent Development and Deployment. Linux Journal","author":"Dirk Merkel","year":"2014","unstructured":"Dirk Merkel et al. 2014. Docker: Lightweight Linux Containers for Consistent Development and Deployment. Linux Journal (2014)."},{"key":"e_1_3_2_1_46_1","unstructured":"Microsoft Azure. 2025. Microsoft Azure Functions. https:\/\/azure.microsoft.com\/pricing\/details\/functions\/."},{"key":"e_1_3_2_1_47_1","unstructured":"Microsoft Azure. 2025. Microsoft Azure Virtual Machines. https:\/\/azure.microsoft.com\/pricing\/details\/virtual-machines\/."},{"key":"e_1_3_2_1_48_1","volume-title":"Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602","author":"Mnih Volodymyr","year":"2013","unstructured":"Volodymyr Mnih. 2013. Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)."},{"key":"e_1_3_2_1_49_1","volume-title":"Mehdi Mirza, Alex Graves, Timothy P. Lillicrap, Tim Harley, David Silver, and Koray Kavukcuoglu.","author":"Mnih Volodymyr","year":"2016","unstructured":"Volodymyr Mnih, Adri\u00e0 Puigdom\u00e8nech Badia, Mehdi Mirza, Alex Graves, Timothy P. Lillicrap, Tim Harley, David Silver, and Koray Kavukcuoglu. 2016. Asynchronous Methods for Deep Reinforcement Learning. arXiv:1602.01783 [cs.LG] https:\/\/arxiv.org\/abs\/1602.01783"},{"key":"e_1_3_2_1_50_1","volume-title":"Ray: A Distributed Framework for Emerging AI Applications. arXiv:1712.05889 [cs.DC] https:\/\/arxiv.org\/abs\/1712.05889","author":"Moritz Philipp","year":"2018","unstructured":"Philipp Moritz, Robert Nishihara, Stephanie Wang, Alexey Tumanov, Richard Liaw, Eric Liang, Melih Elibol, Zongheng Yang, William Paul, Michael I. Jordan, and Ion Stoica. 2018. Ray: A Distributed Framework for Emerging AI Applications. arXiv:1712.05889 [cs.DC] https:\/\/arxiv.org\/abs\/1712.05889"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1038\/s42256-020-00272-0","article-title":"Automating turbulence modelling by multi-agent reinforcement learning","volume":"3","author":"Novati Guido","year":"2021","unstructured":"Guido Novati, Hugues Lascombes de Laroussilhe, and Petros Koumoutsakos. 2021. Automating turbulence modelling by multi-agent reinforcement learning. Nature Machine Intelligence 3, 1 (2021), 87\u201396.","journal-title":"Nature Machine Intelligence"},{"key":"e_1_3_2_1_52_1","volume-title":"SOCK: Rapid Task Provisioning with Serverless-Optimized Containers. In 2018 USENIX annual technical conference (USENIX ATC).","author":"Oakes Edward","year":"2018","unstructured":"Edward Oakes, Leon Yang, Dennis Zhou, Kevin Houck, Tyler Harter, Andrea Arpaci-Dusseau, and Remzi Arpaci-Dusseau. 2018. SOCK: Rapid Task Provisioning with Serverless-Optimized Containers. In 2018 USENIX annual technical conference (USENIX ATC)."},{"key":"e_1_3_2_1_53_1","volume-title":"Dealing with non-stationarity in multi-agent deep reinforcement learning. arXiv preprint arXiv:1906.04737","author":"Papoudakis Georgios","year":"2019","unstructured":"Georgios Papoudakis, Filippos Christianos, Arrasy Rahman, and Stefano V Albrecht. 2019. Dealing with non-stationarity in multi-agent deep reinforcement learning. arXiv preprint arXiv:1906.04737 (2019)."},{"key":"e_1_3_2_1_54_1","volume-title":"Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks (NeurIPS). http:\/\/arxiv.org\/abs\/2006","author":"Papoudakis Georgios","unstructured":"Georgios Papoudakis, Filippos Christianos, Lukas Sch\u00e4fer, and Stefano V. Albrecht. 2021. Benchmarking Multi-Agent Deep Reinforcement Learning Algorithms in Cooperative Tasks. In Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks (NeurIPS). http:\/\/arxiv.org\/abs\/2006.07869"},{"key":"e_1_3_2_1_55_1","volume-title":"Ridge rider: Finding Diverse Solutions by Following Eigenvectors of the Hessian. Advances in Neural Information Processing Systems (NIPS)","author":"Parker-Holder Jack","year":"2020","unstructured":"Jack Parker-Holder, Luke Metz, Cinjon Resnick, Hengyuan Hu, Adam Lerer, Alistair Letcher, Alexander Peysakhovich, Aldo Pacchiano, and Jakob Foerster. 2020. Ridge rider: Finding Diverse Solutions by Following Eigenvectors of the Hessian. Advances in Neural Information Processing Systems (NIPS) (2020)."},{"key":"e_1_3_2_1_56_1","unstructured":"Python. 2008. CloudPickle \u2014 Extension of Pickle. https:\/\/pypi.org\/project\/cloudpickle\/."},{"key":"e_1_3_2_1_57_1","unstructured":"Python. 2008. Pickle \u2014 Python Object Serialization. https:\/\/docs.python.org\/3\/library\/pickle.html."},{"key":"e_1_3_2_1_58_1","unstructured":"PyTorch. 2018. PyTorch: Tensors and Dynamic Neural Networks in Python with Strong GPU Acceleration. https:\/\/pytorch.org."},{"key":"e_1_3_2_1_59_1","volume-title":"Gregory Farquhar, Jakob Foerster, and Shimon Whiteson.","author":"Rashid Tabish","year":"2018","unstructured":"Tabish Rashid, Mikayel Samvelyan, Christian Schroeder de Witt, Gregory Farquhar, Jakob Foerster, and Shimon Whiteson. 2018. QMLX: Monotonic Value Function Factorisation for Deep Multi-Agent Reinforcement Learning. arXiv:1803.11485 [cs.LG] https:\/\/arxiv.org\/abs\/1803.11485"},{"key":"e_1_3_2_1_60_1","unstructured":"Redis. 2009. Redis Official Website. http:\/\/redis.io\/."},{"key":"e_1_3_2_1_61_1","volume-title":"Proceedings of the ACM Symposium on Cloud Computing (SoCC).","author":"Romero Francisco","year":"2021","unstructured":"Francisco Romero, Gohar Irfan Chaudhry, \u00cd\u00f1igo Goiri, Pragna Gopa, Paul Batum, Neeraja J Yadwadkar, Rodrigo Fonseca, Christos Kozyrakis, and Ricardo Bianchini. 2021. FaaT: A Transparent Auto-Scaling Cache for Serverless Applications. In Proceedings of the ACM Symposium on Cloud Computing (SoCC)."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3486972"},{"key":"e_1_3_2_1_63_1","volume-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention. Springer, 640\u2013649","author":"Sahoo Pranab","year":"2024","unstructured":"Pranab Sahoo, Ashutosh Tripathi, Sriparna Saha, and Samrat Mondal. 2024. Fedmrl: Data heterogeneity aware federated multi-agent deep reinforcement learning for medical imaging. In International Conference on Medical Image Computing and Computer-Assisted Intervention. Springer, 640\u2013649."},{"key":"e_1_3_2_1_64_1","volume-title":"Gregory Farquhar, Nantas Nardelli, Tim G. J. Rudner, Chia-Man Hung, Philiph H. S. Torr, Jakob Foerster, and Shimon Whiteson.","author":"Samvelyan Mikayel","year":"2019","unstructured":"Mikayel Samvelyan, Tabish Rashid, Christian Schroeder de Witt, Gregory Farquhar, Nantas Nardelli, Tim G. J. Rudner, Chia-Man Hung, Philiph H. S. Torr, Jakob Foerster, and Shimon Whiteson. 2019. The StarCraft Multi-Agent Challenge. CoRR abs\/1902.04043 (2019)."},{"key":"e_1_3_2_1_65_1","unstructured":"Tom Schaul John Quan Ioannis Antonoglou and David Silver. 2016. Prioritized Experience Replay. arXiv:1511.05952 [cs.LG] https:\/\/arxiv.org\/abs\/1511.05952"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636193"},{"key":"e_1_3_2_1_67_1","unstructured":"John Schulman Filip Wolski Prafulla Dhariwal Alec Radford and Oleg Klimov. 2017. Proximal Policy Optimization Algorithms. arXiv:1707.06347 [cs.LG] https:\/\/arxiv.org\/abs\/1707.06347"},{"key":"e_1_3_2_1_68_1","unstructured":"Mohammad Shahrad Rodrigo Fonseca Inigo Goiri Gohar Chaudhry Paul Batum Jason Cooke Eduardo Laureano Colby Tresness Mark Russinovich and Ricardo Bianchini. 2020. Serverless in the Wild: Characterizing and Optimizing the Serverless Workload at a Large Cloud Provider. In 2020 USENIX annual technical conference (USENIX ATC)."},{"key":"e_1_3_2_1_69_1","volume-title":"Adaptive Multi-Agent Deep Reinforcement Learning for Timely Healthcare Interventions. arXiv preprint arXiv:2309.10980","author":"Shaik Thanveer","year":"2023","unstructured":"Thanveer Shaik, Xiaohui Tao, Lin Li, Haoran Xie, Hong-Ning Dai, Feng Zhao, and Jianming Yong. 2023. Adaptive Multi-Agent Deep Reinforcement Learning for Timely Healthcare Interventions. arXiv preprint arXiv:2309.10980 (2023)."},{"key":"e_1_3_2_1_70_1","volume-title":"multi-agent, reinforcement learning for autonomous driving. arXiv preprint arXiv:1610.03295","author":"Shalev-Shwartz Shai","year":"2016","unstructured":"Shai Shalev-Shwartz, Shaked Shammah, and Amnon Shashua. 2016. Safe, multi-agent, reinforcement learning for autonomous driving. arXiv preprint arXiv:1610.03295 (2016)."},{"key":"e_1_3_2_1_71_1","volume-title":"Proceedings of the 2024 ACM Symposium on Cloud Computing (SoCC).","author":"Sui Yifan","year":"2024","unstructured":"Yifan Sui, Hanfei Yu, Yitao Hu, Jianxun Li, and Hao Wang. 2024. Pre-Warming is Not Enough: Accelerating Serverless Inference With Opportunistic Pre-Loading. In Proceedings of the 2024 ACM Symposium on Cloud Computing (SoCC)."},{"key":"e_1_3_2_1_72_1","unstructured":"Sainbayar Sukhbaatar Arthur Szlam and Rob Fergus. 2016. Learning Multiagent Communication with Backpropagation. arXiv:1605.07736 [cs.LG] https:\/\/arxiv.org\/abs\/1605.07736"},{"key":"e_1_3_2_1_73_1","volume-title":"Cliff Diving: Exploring Reward Surfaces in Reinforcement Learning Environments. In Nineteenth International Conference on Machine Learning (ICML).","author":"Sullivan Ryan","year":"2022","unstructured":"Ryan Sullivan, Justin K Terry, Benjamin Black, and John P Dickerson. 2022. Cliff Diving: Exploring Reward Surfaces in Reinforcement Learning Environments. In Nineteenth International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_74_1","volume-title":"Vinicius Zambaldi, Max Jaderberg, Marc Lanctot, Nicolas Sonnerat, Joel Z. Leibo, Karl Tuyls, and Thore Graepel.","author":"Sunehag Peter","year":"2017","unstructured":"Peter Sunehag, Guy Lever, Audrunas Gruslys, Wojciech Marian Czarnecki, Vinicius Zambaldi, Max Jaderberg, Marc Lanctot, Nicolas Sonnerat, Joel Z. Leibo, Karl Tuyls, and Thore Graepel. 2017. Value-Decomposition Networks For Cooperative Multi-Agent Learning. arXiv:1706.05296 [cs.AI] https:\/\/arxiv.org\/abs\/1706.05296"},{"key":"e_1_3_2_1_75_1","volume-title":"Proceedings of the 12th International Conference on Neural Information Processing Systems","author":"Sutton Richard S.","year":"1999","unstructured":"Richard S. Sutton, David McAllester, Satinder Singh, and Yishay Mansour. 1999. Policy gradient methods for reinforcement learning with function approximation. In Proceedings of the 12th International Conference on Neural Information Processing Systems (Denver, CO) (NIPS'99). MIT Press, Cambridge, MA, USA, 1057\u20131063."},{"key":"e_1_3_2_1_76_1","volume-title":"Multiagent cooperation and competition with deep reinforcement learning. PLoS ONE 12","author":"Tampuu Ardi","year":"2015","unstructured":"Ardi Tampuu, Tambet Matiisen, Dorian Kodelja, Ilya Kuzovkin, Kristjan Korjus, Juhan Aru, Jaan Aru, and Raul Vicente. 2015. Multiagent cooperation and competition with deep reinforcement learning. PLoS ONE 12 (2015). https:\/\/api.semanticscholar.org\/CorpusID:12046082"},{"key":"e_1_3_2_1_77_1","volume-title":"International Conference on Machine Learning. https:\/\/api.semanticscholar.org\/CorpusID:274281842","author":"Tan Ming","year":"1997","unstructured":"Ming Tan. 1997. Multi-Agent Reinforcement Learning: Independent versus Cooperative Agents. In International Conference on Machine Learning. https:\/\/api.semanticscholar.org\/CorpusID:274281842"},{"key":"e_1_3_2_1_78_1","first-page":"15032","article-title":"Pettingzoo: Gym for multi-agent reinforcement learning","volume":"34","author":"Terry J","year":"2021","unstructured":"J Terry, Benjamin Black, Nathaniel Grammel, Mario Jayakumar, Ananth Hari, Ryan Sullivan, Luis S Santos, Clemens Dieffendahl, Caroline Horsch, Rodrigo Perez-Vicente, et al. 2021. Pettingzoo: Gym for multi-agent reinforcement learning. Advances in Neural Information Processing Systems 34 (2021), 15032\u201315043.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_79_1","volume-title":"Tristan Deleu, Manuel Goul\u00e3o, Andreas Kallinteris, Markus Krimmel, Arjun KG, Rodrigo Perez-Vicente, Andrea Pierr\u00e9, Sander Schulhoff, Jun Jet Tai, Hannah Tan, and Omar G. Younis.","author":"Towers Mark","year":"2024","unstructured":"Mark Towers, Ariel Kwiatkowski, Jordan Terry, John U. Balis, Gianluca De Cola, Tristan Deleu, Manuel Goul\u00e3o, Andreas Kallinteris, Markus Krimmel, Arjun KG, Rodrigo Perez-Vicente, Andrea Pierr\u00e9, Sander Schulhoff, Jun Jet Tai, Hannah Tan, and Omar G. Younis. 2024. Gymnasium: A Standard Interface for Reinforcement Learning Environments. arXiv:2407.17032 [cs.LG] https:\/\/arxiv.org\/abs\/2407.17032"},{"key":"e_1_3_2_1_80_1","unstructured":"Nicholas Ustaran-Anderegg Michael Pratt and Jaime Sabal-Bermudez. [n. d.]. AgileRL. https:\/\/github.com\/AgileRL\/AgileRL"},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"crossref","unstructured":"Zhewei Yao Amir Gholami Kurt Keutzer and Michael Mahoney. 2020. PyHessian: Neural Networks Through the Lens of the Hessian. arXiv:1912.07145 [cs.LG] https:\/\/arxiv.org\/abs\/1912.07145","DOI":"10.1109\/BigData50022.2020.9378171"},{"key":"e_1_3_2_1_82_1","volume-title":"The Surprising Effectiveness of PPO in Cooperative Multi-Agent Games. In Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track.","author":"Yu Chao","year":"2022","unstructured":"Chao Yu, Akash Velu, Eugene Vinitsky, Jiaxuan Gao, Yu Wang, Alexandre Bayen, and Yi Wu. 2022. The Surprising Effectiveness of PPO in Cooperative Multi-Agent Games. In Thirty-sixth Conference on Neural Information Processing Systems Datasets and Benchmarks Track."},{"key":"e_1_3_2_1_83_1","volume-title":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS).","author":"Yu Hanfei","year":"2024","unstructured":"Hanfei Yu, Rohan Basu Roy, Christian Fontenot, Devesh Tiwari, Jian Li, Hong Zhang, Hao Wang, and Seung-Jong Park. 2024. RainbowCake: Mitigating Coldstarts in Serverless with Layer-wise Container Caching and Sharing. In Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)."},{"key":"e_1_3_2_1_84_1","volume-title":"Nitro: Boosting Distributed Reinforcement Learning with Serverless Computing. In 51st International Conference on Very Large Data Bases (VLDB).","author":"Yu Hanfei","year":"2025","unstructured":"Hanfei Yu, Jacob Carter, Hao Wang, Devesh Tiwari, Jian Li, and Seung-Jong Park. 2025. Nitro: Boosting Distributed Reinforcement Learning with Serverless Computing. In 51st International Conference on Very Large Data Bases (VLDB)."},{"key":"e_1_3_2_1_85_1","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence.","author":"Yu Hanfei","year":"2024","unstructured":"Hanfei Yu, Jian Li, Yang Hua, Xu Yuan, and Hao Wang. 2024. Cheaper and Faster: Distributed Deep Reinforcement Learning with Serverless Computing. In Proceedings of the AAAI Conference on Artificial Intelligence."},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00045"},{"key":"e_1_3_2_1_87_1","volume-title":"Levine (Eds.)","volume":"36","author":"Zang Yifan","year":"2023","unstructured":"Yifan Zang, Jinmin He, Kai Li, Haobo Fu, Qiang Fu, Junliang Xing, and Jian Cheng. 2023. Automatic Grouping for Efficient Cooperative Multi-Agent Reinforcement Learning. In Advances in Neural Information Processing Systems, A. Oh, T. Naumann, A. Globerson, K. Saenko, M. Hardt, and S. Levine (Eds.), Vol. 36. Curran Associates, Inc., 46105\u201346121. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/906c860f1b7515a8ffec02dcdac74048-Paper-Conference.pdf"},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449934"},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"crossref","first-page":"6290","DOI":"10.1109\/TKDE.2022.3178819","article-title":"RLCharge: Imitative multi-agent spatiotemporal reinforcement learning for electric vehicle charging station recommendation","volume":"35","author":"Zhang Weijia","year":"2022","unstructured":"Weijia Zhang, Hao Liu, Hui Xiong, Tong Xu, Fan Wang, Haoran Xin, and Hua Wu. 2022. RLCharge: Imitative multi-agent spatiotemporal reinforcement learning for electric vehicle charging station recommendation. IEEE Transactions on Knowledge and Data Engineering 35, 6 (2022), 6290\u20136304.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"e_1_3_2_1_90_1","first-page":"1","article-title":"MALib: A Parallel Framework for Population-based Multi-agent Reinforcement Learning","volume":"24","author":"Zhou Ming","year":"2023","unstructured":"Ming Zhou, Ziyu Wan, Hanjing Wang, Muning Wen, Runzhe Wu, Ying Wen, Yaodong Yang, Yong Yu, Jun Wang, and Weinan Zhang. 2023. MALib: A Parallel Framework for Population-based Multi-agent Reinforcement Learning. Journal of Machine Learning Research 24, 150 (2023), 1\u201312. http:\/\/jmlr.org\/papers\/v24\/22-0169.html","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_91_1","volume-title":"2020 IEEE High Performance Extreme Computing Conference (HPEC). IEEE, 1\u20139.","author":"Zhou Yutai","year":"2020","unstructured":"Yutai Zhou, Shawn Manuel, Peter Morales, Sheng Li, Jaime Pena, and Ross Allen. 2020. Towards a distributed framework for multi-agent reinforcement learning research. In 2020 IEEE High Performance Extreme Computing Conference (HPEC). IEEE, 1\u20139."}],"event":{"name":"SoCC '25: ACM Symposium on Cloud Computing","location":"Online USA","acronym":"SoCC '25","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems","SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 2025 ACM Symposium on Cloud Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3772052.3772227","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T16:21:12Z","timestamp":1768321272000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3772052.3772227"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,19]]},"references-count":91,"alternative-id":["10.1145\/3772052.3772227","10.1145\/3772052"],"URL":"https:\/\/doi.org\/10.1145\/3772052.3772227","relation":{},"subject":[],"published":{"date-parts":[[2025,11,19]]},"assertion":[{"value":"2026-01-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}