{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T13:06:41Z","timestamp":1775912801815,"version":"3.50.1"},"reference-count":53,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100007601","name":"Horizon 2020","doi-asserted-by":"publisher","award":["945539"],"award-info":[{"award-number":["945539"]}],"id":[{"id":"10.13039\/501100007601","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100009592","name":"Beijing Municipal Science and Technology Commission","doi-asserted-by":"publisher","award":["Z221100003422004"],"award-info":[{"award-number":["Z221100003422004"]}],"id":[{"id":"10.13039\/501100009592","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Artificial Intelligence"],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1016\/j.artint.2023.103905","type":"journal-article","created":{"date-parts":[[2023,3,16]],"date-time":"2023-03-16T12:46:07Z","timestamp":1678970767000},"page":"103905","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":110,"special_numbering":"C","title":["Safe multi-agent reinforcement learning for multi-robot control"],"prefix":"10.1016","volume":"319","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2722-3779","authenticated-orcid":false,"given":"Shangding","family":"Gu","sequence":"first","affiliation":[]},{"given":"Jakub","family":"Grudzien Kuba","sequence":"additional","affiliation":[]},{"given":"Yuanpei","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Yali","family":"Du","sequence":"additional","affiliation":[]},{"given":"Long","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Alois","family":"Knoll","sequence":"additional","affiliation":[]},{"given":"Yaodong","family":"Yang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.artint.2023.103905_br0010","series-title":"Proceedings of the 16th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","first-page":"75","article-title":"Optimizing debt collections using constrained reinforcement learning","author":"Abe","year":"2010"},{"key":"10.1016\/j.artint.2023.103905_br0020","series-title":"International Conference on Machine Learning","first-page":"22","article-title":"Constrained policy optimization","author":"Achiam","year":"2017"},{"issue":"31","key":"10.1016\/j.artint.2023.103905_br0030","doi-asserted-by":"crossref","DOI":"10.1126\/scirobotics.aaw1924","article-title":"Effortless creation of safe robots from modules through self-programming and self-verification","volume":"4","author":"Althoff","year":"2019","journal-title":"Sci. Robot."},{"key":"10.1016\/j.artint.2023.103905_br0040","series-title":"Constrained Markov Decision Processes, vol. 7","author":"Altman","year":"1999"},{"issue":"8","key":"10.1016\/j.artint.2023.103905_br0050","doi-asserted-by":"crossref","first-page":"3861","DOI":"10.1109\/TAC.2016.2638961","article-title":"Control barrier function based quadratic programs for safety critical systems","volume":"62","author":"Ames","year":"2016","journal-title":"IEEE Trans. Autom. Control"},{"key":"10.1016\/j.artint.2023.103905_br0060","doi-asserted-by":"crossref","first-page":"2090","DOI":"10.1109\/LCSYS.2021.3138546","article-title":"Online learning-based trajectory tracking for underactuated vehicles with uncertain dynamics","volume":"6","author":"Beckers","year":"2021","journal-title":"IEEE Control Syst. Lett."},{"issue":"27","key":"10.1016\/j.artint.2023.103905_br0070","doi-asserted-by":"crossref","first-page":"68","DOI":"10.1016\/j.ifacol.2015.11.154","article-title":"Control barrier certificates for safe swarm behavior","volume":"48","author":"Borrmann","year":"2015","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.artint.2023.103905_br0080","series-title":"Openai gym","author":"Brockman","year":"2016"},{"key":"10.1016\/j.artint.2023.103905_br0090","article-title":"Safe learning in robotics: from learning-based control to safe reinforcement learning","volume":"5","author":"Brunke","year":"2021","journal-title":"Annu. Rev. Control Robotics Auton. Syst."},{"key":"10.1016\/j.artint.2023.103905_br0100","series-title":"2021 40th Chinese Control Conference (CCC)","first-page":"879","article-title":"Multi-robot formation control and implementation","author":"Chen","year":"2021"},{"key":"10.1016\/j.artint.2023.103905_br0110","series-title":"Thirty-Second AAAI Conference on Artificial Intelligence","article-title":"Cellular network traffic scheduling with deep reinforcement learning","author":"Chinchali","year":"2018"},{"issue":"1","key":"10.1016\/j.artint.2023.103905_br0120","first-page":"6070","article-title":"Risk-constrained reinforcement learning with percentile risk criteria","volume":"18","author":"Chow","year":"2017","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.artint.2023.103905_br0130","article-title":"A Lyapunov-based approach to safe reinforcement learning","volume":"31","author":"Chow","year":"2018","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2023.103905_br0140","author":"Chow"},{"issue":"3","key":"10.1016\/j.artint.2023.103905_br0150","doi-asserted-by":"crossref","first-page":"1086","DOI":"10.1109\/TITS.2019.2901791","article-title":"Multi-agent deep reinforcement learning for large-scale traffic signal control","volume":"21","author":"Chu","year":"2019","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"issue":"3","key":"10.1016\/j.artint.2023.103905_br0160","doi-asserted-by":"crossref","first-page":"253","DOI":"10.1016\/j.mechmachtheory.2007.03.003","article-title":"An atlas of physical human\u2013robot interaction","volume":"43","author":"De Santis","year":"2008","journal-title":"Mech. Mach. Theory"},{"key":"10.1016\/j.artint.2023.103905_br0170","author":"Schroeder de Witt"},{"issue":"10\u201311","key":"10.1016\/j.artint.2023.103905_br0180","doi-asserted-by":"crossref","first-page":"977","DOI":"10.1177\/0278364902021010981","article-title":"A framework and architecture for multi-robot coordination","volume":"21","author":"Fierro","year":"2002","journal-title":"Int. J. Robot. Res."},{"issue":"1","key":"10.1016\/j.artint.2023.103905_br0190","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"Garc\u0131a","year":"2015","journal-title":"J. Mach. Learn. Res."},{"issue":"4","key":"10.1016\/j.artint.2023.103905_br0200","doi-asserted-by":"crossref","first-page":"81","DOI":"10.3390\/robotics11040081","article-title":"Constrained reinforcement learning for vehicle motion planning with topological reachability analysis","volume":"11","author":"Gu","year":"2022","journal-title":"Robotics"},{"key":"10.1016\/j.artint.2023.103905_br0210","author":"Gu"},{"key":"10.1016\/j.artint.2023.103905_br0220","author":"Gu"},{"key":"10.1016\/j.artint.2023.103905_br0230","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1016\/j.ins.2012.07.014","article-title":"From model-based control to data-driven control: survey, classification and perspective","volume":"235","author":"Hou","year":"2013","journal-title":"Inf. Sci."},{"key":"10.1016\/j.artint.2023.103905_br0240","series-title":"International Conference on Learning Representations","article-title":"Trust region policy optimisation in multi-agent reinforcement learning","author":"Grudzien Kuba","year":"2021"},{"key":"10.1016\/j.artint.2023.103905_br0250","first-page":"13458","article-title":"Settling the variance of multi-agent policy gradients","volume":"34","author":"Grudzien Kuba","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2023.103905_br0260","series-title":"Bandit Algorithms","author":"Lattimore","year":"2020"},{"key":"10.1016\/j.artint.2023.103905_br0270","unstructured":"Timothy Paul Lillicrap, Jonathan James Hunt, Alexander Pritzel, Nicolas Manfred Otto Heess, Tom Erez, Yuval Tassa, David Silver, Daniel Pieter Wierstra, Continuous control with deep reinforcement learning, September 15 2020. US Patent 10,776,692."},{"key":"10.1016\/j.artint.2023.103905_br0280","series-title":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","first-page":"157","article-title":"Cmix: deep multi-agent reinforcement learning with peak and average constraints","author":"Liu","year":"2021"},{"key":"10.1016\/j.artint.2023.103905_br0290","first-page":"8767","article-title":"Decentralized policy gradient descent ascent for safe multi-agent reinforcement learning","volume":"vol. 35","author":"Lu","year":"2021"},{"key":"10.1016\/j.artint.2023.103905_br0300","series-title":"2021 IEEE International Conference on Robotics and Automation (ICRA)","first-page":"6064","article-title":"Reinforcement learning for autonomous driving with latent state inference and spatial-temporal relationships","author":"Ma","year":"2021"},{"key":"10.1016\/j.artint.2023.103905_br0310","series-title":"Thirty-Fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)","article-title":"Isaac gym: high performance gpu based physics simulation for robot learning","author":"Makoviychuk","year":"2021"},{"key":"10.1016\/j.artint.2023.103905_br0320","series-title":"Proceedings of the 29th International Conference on International Conference on Machine Learning","first-page":"1451","article-title":"Safe exploration in Markov decision processes","author":"Moldovan","year":"2012"},{"key":"10.1016\/j.artint.2023.103905_br0330","author":"NVIDIA"},{"issue":"3","key":"10.1016\/j.artint.2023.103905_br0340","doi-asserted-by":"crossref","first-page":"617","DOI":"10.1109\/TAC.2015.2444131","article-title":"Distributed coordination control for multi-robot networks using Lyapunov-like barrier functions","volume":"61","author":"Panagou","year":"2015","journal-title":"IEEE Trans. Autom. Control"},{"key":"10.1016\/j.artint.2023.103905_br0350","first-page":"12208","article-title":"Factored multi-agent centralised policy gradients","volume":"34","author":"Peng","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2023.103905_br0360","author":"Pollard"},{"key":"10.1016\/j.artint.2023.103905_br0370","series-title":"International Conference on Learning Representations","article-title":"Learning safe multi-agent control with decentralized neural barrier certificates","author":"Qin","year":"2020"},{"key":"10.1016\/j.artint.2023.103905_br0380","series-title":"International Conference on Machine Learning","first-page":"4295","article-title":"Qmix: monotonic value function factorisation for deep multi-agent reinforcement learning","author":"Rashid","year":"2018"},{"key":"10.1016\/j.artint.2023.103905_br0390","author":"Ray"},{"key":"10.1016\/j.artint.2023.103905_br0400","series-title":"Proceedings of the 18th International Conference on Autonomous Agents and MultiAgent Systems","first-page":"2186","article-title":"The starcraft multi-agent challenge","author":"Samvelyan","year":"2019"},{"key":"10.1016\/j.artint.2023.103905_br0410","series-title":"International Conference on Machine Learning","first-page":"1889","article-title":"Trust region policy optimization","author":"Schulman","year":"2015"},{"key":"10.1016\/j.artint.2023.103905_br0420","author":"Schulman"},{"key":"10.1016\/j.artint.2023.103905_br0430","author":"Schulman"},{"key":"10.1016\/j.artint.2023.103905_br0440","author":"Shalev-Shwartz"},{"issue":"7587","key":"10.1016\/j.artint.2023.103905_br0450","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"Silver","year":"2016","journal-title":"Nature"},{"key":"10.1016\/j.artint.2023.103905_br0460","series-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"10.1016\/j.artint.2023.103905_br0470","series-title":"Advances in Neural Information Processing Systems (NeurIPS)","article-title":"Constrained update projection approach to safe policy optimization","author":"Yang","year":"2022"},{"key":"10.1016\/j.artint.2023.103905_br0480","first-page":"8823","article-title":"Policy optimization with stochastic mirror descent","volume":"vol. 36","author":"Yang","year":"2022"},{"key":"10.1016\/j.artint.2023.103905_br0490","author":"Yu"},{"key":"10.1016\/j.artint.2023.103905_br0500","series-title":"2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","first-page":"3512","article-title":"Safe continuous control with constrained model-based policy optimization","author":"Zanger","year":"2021"},{"key":"10.1016\/j.artint.2023.103905_br0510","author":"Zhang"},{"key":"10.1016\/j.artint.2023.103905_br0520","first-page":"750","article-title":"Dear: deep reinforcement learning for online advertising impression in recommender systems","volume":"vol. 35","author":"Zhao","year":"2021"},{"key":"10.1016\/j.artint.2023.103905_br0530","author":"Zhu"}],"container-title":["Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0004370223000516?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0004370223000516?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,9,13]],"date-time":"2025-09-13T07:25:56Z","timestamp":1757748356000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0004370223000516"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6]]},"references-count":53,"alternative-id":["S0004370223000516"],"URL":"https:\/\/doi.org\/10.1016\/j.artint.2023.103905","relation":{},"ISSN":["0004-3702"],"issn-type":[{"value":"0004-3702","type":"print"}],"subject":[],"published":{"date-parts":[[2023,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Safe multi-agent reinforcement learning for multi-robot control","name":"articletitle","label":"Article Title"},{"value":"Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.artint.2023.103905","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"Crown Copyright \u00a9 2023 Published by Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"103905"}}