{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T21:55:08Z","timestamp":1757627708166,"version":"3.44.0"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031998713"},{"type":"electronic","value":"9783031998720"}],"license":[{"start":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:00:00Z","timestamp":1755820800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:00:00Z","timestamp":1755820800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-031-99872-0_22","type":"book-chapter","created":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T11:08:44Z","timestamp":1755774524000},"page":"313-326","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Accelerating Independent Multi-Agent Reinforcement Learning on\u00a0Multi-GPU Platforms"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0069-8213","authenticated-orcid":false,"given":"Samuel","family":"Wiggins","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1455-0412","authenticated-orcid":false,"given":"Nikunj","family":"Gupta","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1476-2984","authenticated-orcid":false,"given":"Grace","family":"Zgheib","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1045-0019","authenticated-orcid":false,"given":"Mahesh A.","family":"Iyer","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1609-8589","authenticated-orcid":false,"given":"Viktor","family":"Prasanna","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,8,22]]},"reference":[{"key":"22_CR1","doi-asserted-by":"crossref","unstructured":"Cardarilli, G.C., et al.: FPGA implementation of Q-RTS for real-time swarm intelligence systems. In: 2020 54th Asilomar Conference on Signals, Systems, and Computers, pp. 116\u2013120. IEEE (2020)","DOI":"10.1109\/IEEECONF51394.2020.9443368"},{"key":"22_CR2","unstructured":"Castro, P.S., Moitra, S., Gelada, C., Kumar, S., Bellemare, M.G.: Dopamine: a research framework for deep reinforcement learning (2018). http:\/\/arxiv.org\/abs\/1812.06110"},{"key":"22_CR3","unstructured":"De\u00a0Witt, C.S., et al.: Is independent learning all you need in the starcraft multi-agent challenge? arXiv preprint arXiv:2011.09533 (2020)"},{"key":"22_CR4","unstructured":"Frostig, R., Johnson, M.J., Leary, C.: Compiling machine learning programs via high-level tracing. Syst. Mach. Learn. 4(9) (2018)"},{"key":"22_CR5","unstructured":"Hu, J., Jiang, S., Harding, S.A., Wu, H., Wei Liao, S.: Rethinking the Implementation Tricks and Monotonicity Constraint in Cooperative Multi-Agent Reinforcement Learning (2021)"},{"issue":"315","key":"22_CR6","first-page":"1","volume":"24","author":"S Hu","year":"2023","unstructured":"Hu, S., et al.: MARLlib: a scalable and efficient multi-agent reinforcement learning library. J. Mach. Learn. Res. 24(315), 1\u201323 (2023)","journal-title":"J. Mach. Learn. Res."},{"key":"22_CR7","unstructured":"Huang, S., et al.: CleanRL: high-quality single-file implementations of deep reinforcement learning algorithms. J. Mach. Learn. Res. 23(274), 1\u201318 (2022). http:\/\/jmlr.org\/papers\/v23\/21-1342.html"},{"key":"22_CR8","first-page":"20469","volume":"35","author":"J Jiang","year":"2022","unstructured":"Jiang, J., Lu, Z.: I2Q: a fully decentralized Q-learning algorithm. Adv. Neural. Inf. Process. Syst. 35, 20469\u201320481 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"22_CR9","unstructured":"Jiang, J., Su, K., Lu, Z.: Fully decentralized cooperative multi-agent reinforcement learning: a survey. arXiv preprint arXiv:2401.04934 (2024)"},{"key":"22_CR10","unstructured":"Kingma, D.P.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"issue":"316","key":"22_CR11","first-page":"1","volume":"23","author":"T Lan","year":"2022","unstructured":"Lan, T., Srinivasa, S., Wang, H., Zheng, S.: WarpDrive: fast end-to-end deep multi-agent reinforcement learning on a GPU. J. Mach. Learn. Res. 23(316), 1\u20136 (2022)","journal-title":"J. Mach. Learn. Res."},{"key":"22_CR12","doi-asserted-by":"crossref","unstructured":"Li, S., et\u00a0al.: PyTorch distributed: experiences on accelerating data parallel training. Proc. VLDB Endow. 13(12)","DOI":"10.14778\/3415478.3415530"},{"key":"22_CR13","unstructured":"Liang, E., et al.: RLlib: abstractions for distributed reinforcement learning. In: International Conference on Machine Learning, pp. 3053\u20133062. PMLR (2018)"},{"key":"22_CR14","unstructured":"Lillicrap, T.: Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)"},{"key":"22_CR15","doi-asserted-by":"crossref","unstructured":"Littman, M.L.: Markov games as a framework for multi-agent reinforcement learning. In: Machine Learning Proceedings 1994, pp. 157\u2013163. Elsevier (1994)","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"22_CR16","unstructured":"Lowe, R., Wu, Y.I., Tamar, A., Harb, J., Pieter\u00a0Abbeel, O., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"22_CR17","doi-asserted-by":"crossref","unstructured":"Meng, Y., Kinsner, M., Singh, D., Iyer, M., Prasanna, V.: PEARL: enabling portable, productive, and high-performance deep reinforcement learning using heterogeneous platforms. In: Proceedings of the 21st ACM International Conference on Computing Frontiers, pp. 41\u201350 (2024)","DOI":"10.1145\/3649153.3649193"},{"key":"22_CR18","unstructured":"Moritz, P., et\u00a0al.: Ray: a distributed framework for emerging $$\\{$$AI$$\\}$$ applications. In: 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 2018), pp. 561\u2013577 (2018)"},{"issue":"9","key":"22_CR19","doi-asserted-by":"publisher","first-page":"3826","DOI":"10.1109\/TCYB.2020.2977374","volume":"50","author":"TT Nguyen","year":"2020","unstructured":"Nguyen, T.T., Nguyen, N.D., Nahavandi, S.: Deep reinforcement learning for multiagent systems: a review of challenges, solutions, and applications. IEEE Trans. Cybernet. 50(9), 3826\u20133839 (2020)","journal-title":"IEEE Trans. Cybernet."},{"key":"22_CR20","unstructured":"Paszke, A., et\u00a0al.: PyTorch: an imperative style, high-performance deep learning library. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"22_CR21","unstructured":"Raffin, A., Hill, A., Gleave, A., Kanervisto, A., Ernestus, M., Dormann, N.: Stable-baselines3: reliable reinforcement learning implementations. J. Mach. Learn. Res. 22(268), 1\u20138 (2021). http:\/\/jmlr.org\/papers\/v22\/20-1364.html"},{"key":"22_CR22","unstructured":"Ruder, S.: An overview of gradient descent optimization algorithms. arXiv preprint arXiv:1609.04747 (2016)"},{"key":"22_CR23","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"22_CR24","doi-asserted-by":"crossref","unstructured":"Sharma, P.K., Fernandez, R., Zaroukian, E., Dorothy, M., Basak, A., Asher, D.E.: Survey of recent multi-agent reinforcement learning algorithms utilizing centralized training. In: Artificial Intelligence and Machine Learning for Multi-Domain Operations Applications III, vol. 11746, pp. 665\u2013676. SPIE (2021)","DOI":"10.1117\/12.2585808"},{"key":"22_CR25","unstructured":"Skrynnik, A., Andreychuk, A., Borzilov, A., Chernyavskiy, A., Yakovlev, K., Panov, A.: POGEMA: a benchmark platform for cooperative multi-agent navigation (2024). https:\/\/arxiv.org\/abs\/2407.14931"},{"key":"22_CR26","doi-asserted-by":"crossref","unstructured":"Tan, M.: Multi-agent reinforcement learning: independent vs. cooperative agents. In: Proceedings of the Tenth International Conference on Machine Learning, pp. 330\u2013337 (1993)","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"22_CR27","first-page":"279","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins, C.J., Dayan, P.: Q-learning. Mach. Learn. 8, 279\u2013292 (1992)","journal-title":"Mach. Learn."},{"key":"22_CR28","unstructured":"Weng, J., et al.: Tianshou: a highly modularized deep reinforcement learning library. J. Mach. Learn. Res. 23(267), 1\u20136 (2022). http:\/\/jmlr.org\/papers\/v23\/21-1127.html"},{"key":"22_CR29","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.15579913","author":"S Wiggins","year":"2025","unstructured":"Wiggins, S., Gupta, N., Zgheib, G., Iyer, M., Prasanna, V.: Accelerating Independent Multi-Agent Reinforcement Learning on Multi-GPU Platforms (2025). https:\/\/doi.org\/10.5281\/zenodo.15579913","journal-title":"Accelerating Independent Multi-Agent Reinforcement Learning on Multi-GPU Platforms"},{"key":"22_CR30","doi-asserted-by":"publisher","unstructured":"Wiggins, S., Meng, Y., Kannan, R., Prasanna, V.: Characterizing speed performance of multi-agent reinforcement learning. In: Proceedings of the 12th International Conference on Data Science, Technology and Applications - DATA, pp. 327\u2013334. INSTICC, SciTePress (2023). https:\/\/doi.org\/10.5220\/0012082200003541","DOI":"10.5220\/0012082200003541"},{"key":"22_CR31","doi-asserted-by":"crossref","unstructured":"Wiggins, S., Prasanna, V.: Accelerating multi-agent DDPG training on multi-GPU platforms. In: 2024 IEEE High Performance Extreme Computing Conference (HPEC), pp.\u00a01\u20135. IEEE (2024)","DOI":"10.1109\/HPEC62836.2024.10938502"},{"key":"22_CR32","doi-asserted-by":"crossref","unstructured":"Zhang, K., Yang, Z., Ba\u015far, T.: Multi-agent reinforcement learning: a selective overview of theories and algorithms. In: Handbook of Reinforcement Learning and Control, pp. 321\u2013384 (2021)","DOI":"10.1007\/978-3-030-60990-0_12"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2025: Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-99872-0_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,10]],"date-time":"2025-09-10T04:36:08Z","timestamp":1757478968000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-99872-0_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,22]]},"ISBN":["9783031998713","9783031998720"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-99872-0_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,8,22]]},"assertion":[{"value":"22 August 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Euro-Par","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Dresden","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 April 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 April 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"europar2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2025.euro-par.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}