{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T14:28:22Z","timestamp":1766068102359,"version":"3.40.3"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031711510"},{"type":"electronic","value":"9783031711527"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-71152-7_4","type":"book-chapter","created":{"date-parts":[[2024,10,24]],"date-time":"2024-10-24T06:03:19Z","timestamp":1729749799000},"page":"64-81","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["SADMA: Scalable Asynchronous Distributed Multi-agent Reinforcement Learning Training Framework"],"prefix":"10.1007","author":[{"given":"Sizhe","family":"Wang","sequence":"first","affiliation":[]},{"given":"Long","family":"Qian","sequence":"additional","affiliation":[]},{"given":"Cairun","family":"Yi","sequence":"additional","affiliation":[]},{"given":"Fan","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Qian","family":"Kou","sequence":"additional","affiliation":[]},{"given":"Mingyang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Xingyug","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Xuguang","family":"Lan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,16]]},"reference":[{"issue":"2","key":"4_CR1","first-page":"687","volume":"16","author":"MF Ahmad","year":"2024","unstructured":"Ahmad, M.F.: Public opinion and persuasion of algorithmic fairness: assessment of communication protocol performance for use in simulation-based reinforcement learning training. Int. J. Inf. Technol. 16(2), 687\u2013696 (2024)","journal-title":"Int. J. Inf. Technol."},{"key":"4_CR2","unstructured":"Babaeizadeh, M., Frosio, I., Tyree, S., Clemons, J., Kautz, J.: Reinforcement learning through asynchronous advantage actor-critic on a gpu. arXiv preprint arXiv:1611.06256 (2016)"},{"key":"4_CR3","unstructured":"Berner, C., et\u00a0al.: Dota 2 with large scale deep reinforcement learning. arXiv preprint arXiv:1912.06680 (2019)"},{"issue":"1","key":"4_CR4","doi-asserted-by":"publisher","first-page":"427","DOI":"10.1109\/TII.2012.2219061","volume":"9","author":"Y Cao","year":"2012","unstructured":"Cao, Y., Yu, W., Ren, W., Chen, G.: An overview of recent progress in the study of distributed multi-agent coordination. IEEE Trans. Ind. Inf. 9(1), 427\u2013438 (2012)","journal-title":"IEEE Trans. Ind. Inf."},{"key":"4_CR5","unstructured":"Espeholt, L., Marinier, R., Stanczyk, P., Wang, K., Michalski, M.: Seed rl: scalable and efficient deep-rl with accelerated central inference. arXiv preprint arXiv:1910.06591 (2019)"},{"key":"4_CR6","unstructured":"Espeholt, L., et\u00a0al.: Impala: scalable distributed deep-rl with importance weighted actor-learner architectures. In: International Conference on Machine Learning, pp. 1407\u20131416. PMLR (2018)"},{"key":"4_CR7","unstructured":"Hintjens, P.: ZeroMQ: Messaging for Many Applications. O\u2019Reilly Media, Inc., Newton (2013)"},{"key":"4_CR8","unstructured":"Hu, J., Jiang, S., Harding, S.A., Wu, H., Liao, S.W.: Rethinking the implementation tricks and monotonicity constraint in cooperative multi-agent reinforcement learning. arXiv preprint arXiv:2102.03479 (2021)"},{"key":"4_CR9","unstructured":"Hu, S., et al.: Marllib: extending rllib for multi-agent reinforcement learning. arXiv preprint arXiv:2210.13708 (2022)"},{"key":"4_CR10","unstructured":"Huh, D., Mohapatra, P.: Multi-agent reinforcement learning: a comprehensive survey. arXiv preprint arXiv:2312.10256 (2023)"},{"key":"4_CR11","unstructured":"Konda, V., Tsitsiklis, J.: Actor-critic algorithms. Adv. Neural Inf. Process. Syst. 12 (1999)"},{"key":"4_CR12","unstructured":"Liang, E., et al.: Rllib: abstractions for distributed reinforcement learning. In: International Conference on Machine Learning, pp. 3053\u20133062. PMLR (2018)"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"Luksa, M.: Kubernetes in action. Simon and Schuster (2017)","DOI":"10.3139\/9783446456020.fm"},{"key":"4_CR14","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1928\u20131937. PMLR (2016)"},{"key":"4_CR15","unstructured":"Moritz, P., et\u00a0al.: Ray: A distributed framework for emerging $$\\{$$AI$$\\}$$ applications. In: 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 2018), pp. 561\u2013577 (2018)"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Robbins, H., Monro, S.: A stochastic approximation method. Ann. Math. Stat. 400\u2013407 (1951)","DOI":"10.1214\/aoms\/1177729586"},{"key":"4_CR17","unstructured":"Samvelyan, M., et al.: The starcraft multi-agent challenge. arXiv preprint arXiv:1902.04043 (2019)"},{"key":"4_CR18","first-page":"20","volume":"2","author":"O Vinyals","year":"2019","unstructured":"Vinyals, O., et al.: Alphastar: mastering the real-time strategy game starcraft ii. DeepMind Blog 2, 20 (2019)","journal-title":"DeepMind Blog"},{"key":"4_CR19","unstructured":"de\u00a0Witt, C.S., et al.: Is independent learning all you need in the starcraft multi-agent challenge? arXiv preprint arXiv:2011.09533 (2020)"},{"key":"4_CR20","unstructured":"Yang, X., et al.: A versatile multi-agent reinforcement learning benchmark for inventory management. arXiv preprint arXiv:2306.07542 (2023)"},{"issue":"5","key":"4_CR21","doi-asserted-by":"publisher","first-page":"10026","DOI":"10.3390\/s150510026","volume":"15","author":"D Ye","year":"2015","unstructured":"Ye, D., Zhang, M., Yang, Y.: A multi-agent framework for packet routing in wireless sensor networks. Sensors 15(5), 10026\u201310047 (2015)","journal-title":"Sensors"},{"key":"4_CR22","unstructured":"Yuan, L., Zhang, Z., Li, L., Guan, C., Yu, Y.: A survey of progress on cooperative multi-agent reinforcement learning in open environment. arXiv preprint arXiv:2312.01058 (2023)"},{"key":"4_CR23","doi-asserted-by":"crossref","unstructured":"Zhang, H., et al.: Cityflow: a multi-agent reinforcement learning environment for large scale city traffic scenario. In: The World Wide Web Conference, pp. 3620\u20133624 (2019)","DOI":"10.1145\/3308558.3314139"},{"key":"4_CR24","doi-asserted-by":"crossref","unstructured":"Zhang, K., Yang, Z., Ba\u015far, T.: Multi-agent reinforcement learning: a selective overview of theories and algorithms. In: Handbook of Reinforcement Learning and Control, pp. 321\u2013384 (2021)","DOI":"10.1007\/978-3-030-60990-0_12"},{"key":"4_CR25","first-page":"150","volume":"24","author":"M Zhou","year":"2023","unstructured":"Zhou, M., et al.: Malib: a parallel framework for population-based multi-agent reinforcement learning. J. Mach. Learn. Res. 24, 150\u20131 (2023)","journal-title":"J. Mach. Learn. Res."}],"container-title":["Lecture Notes in Computer Science","Engineering Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-71152-7_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,24]],"date-time":"2024-10-24T06:04:16Z","timestamp":1729749856000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-71152-7_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031711510","9783031711527"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-71152-7_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"16 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"EMAS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Engineering Multi-Agent Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Auckland","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"New Zealand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 May 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 May 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"emas2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/emas.in.tu-clausthal.de\/2024\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}