{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,10]],"date-time":"2025-09-10T22:17:58Z","timestamp":1757542678174,"version":"3.40.3"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,12,30]],"date-time":"2024-12-30T00:00:00Z","timestamp":1735516800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2024,12,30]],"date-time":"2024-12-30T00:00:00Z","timestamp":1735516800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"Scientific and Technological Innovation 2030","award":["2021ZD0110900"],"award-info":[{"award-number":["2021ZD0110900"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Intell Robot Syst"],"DOI":"10.1007\/s10846-024-02156-6","type":"journal-article","created":{"date-parts":[[2024,12,30]],"date-time":"2024-12-30T05:16:22Z","timestamp":1735535782000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Safe Multi-Agent Reinforcement Learning via Approximate Hamilton-Jacobi Reachability"],"prefix":"10.1007","volume":"111","author":[{"given":"Kai","family":"Zhu","sequence":"first","affiliation":[]},{"given":"Fengbo","family":"Lan","sequence":"additional","affiliation":[]},{"given":"Wenbo","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Tao","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,30]]},"reference":[{"key":"2156_CR1","unstructured":"Gronauer, S., Diepold, K.: Multi-agent deep reinforcement learning: a survey. Artif. Intell. Rev., 1\u201349 (2022)"},{"key":"2156_CR2","unstructured":"Altman, E.: Constrained Markov Decision Processes vol. 7, (1999)"},{"key":"2156_CR3","doi-asserted-by":"publisher","first-page":"411","DOI":"10.1146\/annurev-control-042920-020211","volume":"5","author":"L Brunke","year":"2022","unstructured":"Brunke, L., Greeff, M., Hall, A.W., Yuan, Z., Zhou, S., Panerati, J., Schoellig, A.P.: Safe learning in robotics: From learning-based control to safe reinforcement learning. Ann. Rev. Control Robot. Auton. Syst. 5, 411\u2013444 (2022)","journal-title":"Ann. Rev. Control Robot. Auton. Syst."},{"key":"2156_CR4","doi-asserted-by":"crossref","unstructured":"Alshiekh, M., Bloem, R., Ehlers, R., K\u00f6nighofer, B., Niekum, S., Topcu, U.: Safe reinforcement learning via shielding. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32 (2018)","DOI":"10.1609\/aaai.v32i1.11797"},{"key":"2156_CR5","doi-asserted-by":"publisher","unstructured":"Dalal, G., Dvijotham, K., Vecerik, M., Hester, T., Paduraru, C., Tassa, Y.: Safe Exploration in Continuous Action Spaces. 1801\u201308757 (2018) https:\/\/doi.org\/10.48550\/arXiv.1801.08757arXiv:1801.08757 [cs.AI]","DOI":"10.48550\/arXiv.1801.08757"},{"key":"2156_CR6","doi-asserted-by":"publisher","unstructured":"Sheebaelhamd, Z., Zisis, K., Nisioti, A., Gkouletsos, D., Pavllo, D., Kohler, J.: Safe Deep Reinforcement Learning for Multi-Agent Systems with Continuous Action Spaces. 2108\u201303952 (2021) https:\/\/doi.org\/10.48550\/arXiv.2108.03952arXiv:2108.03952 [cs.LG]","DOI":"10.48550\/arXiv.2108.03952"},{"key":"2156_CR7","unstructured":"Lowe, R., WU, Y., Tamar, A., Harb, J., Pieter\u00a0Abbeel, O., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. Adv. Neural Inf. Process. Syst., 30 (2017)"},{"issue":"7","key":"2156_CR8","doi-asserted-by":"publisher","first-page":"947","DOI":"10.1109\/TAC.2005.851439","volume":"50","author":"IM Mitchell","year":"2005","unstructured":"Mitchell, I.M., Bayen, A.M., Tomlin, C.J.: A time-dependent hamilton-jacobi formulation of reachable sets for continuous dynamic games. IEEE Trans. Autom. Control 50(7), 947\u2013957 (2005). https:\/\/doi.org\/10.1109\/TAC.2005.851439","journal-title":"IEEE Trans. Autom. Control"},{"key":"2156_CR9","doi-asserted-by":"publisher","unstructured":"Munos, R., Baird, L.C., Moore, A.W.: Gradient descent approaches to neural-net-based solutions of the hamilton-jacobi-bellman equation. In: IJCNN\u201999. International Joint Conference on Neural Networks. Proceedings (Cat. No.99CH36339), vol. 3, pp. 2152\u201321573 (1999). https:\/\/doi.org\/10.1109\/IJCNN.1999.832721","DOI":"10.1109\/IJCNN.1999.832721"},{"key":"2156_CR10","doi-asserted-by":"publisher","unstructured":"Fisac, J.F., Lugovoy, N.F., Rubies-Royo, V., Ghosh, S., Tomlin, C.J.: Bridging hamilton-jacobi safety analysis and reinforcement learning. In: 2019 International Conference on Robotics and Automation (ICRA), pp. 8550\u20138556 (2019). https:\/\/doi.org\/10.1109\/ICRA.2019.8794107","DOI":"10.1109\/ICRA.2019.8794107"},{"key":"2156_CR11","unstructured":"Sunehag, P., Lever, G., Gruslys, A., Czarnecki, W.M., Zambaldi, V., Jaderberg, M., Lanctot, M., Sonnerat, N., Leibo, J.Z., Tuyls, K., Graepel, T.: Value-decomposition networks for cooperative multi-agent learning based on team reward. In: 18th International Conference on Autonomous Agents and MultiAgent Systems (AAMAS) (AAMAS\u2019 18), pp. 2085\u20132087 (2018)"},{"key":"2156_CR12","unstructured":"Rashid, T., Samvelyan, M., Witt, C.S., Farquhar, G., Foerster, J., Whiteson, S.: Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning. In: 35th International Conference on Machine Learning (ICML). Proceedings of Machine Learning Research, vol. 80 (2018)"},{"key":"2156_CR13","unstructured":"Son, K., Kim, D., Kang, W.J., Hostallero, D.E., Yi, Y.: Qtran: Learning to factorize with transformation for cooperative multi-agent reinforcement learning. In: International Conference on Machine Learning, pp. 5887\u20135896 (2019). PMLR"},{"key":"2156_CR14","doi-asserted-by":"crossref","unstructured":"Foerster, J., Farquhar, G., Afouras, T., Nardelli, N., Whiteson, S.: Counterfactual multi-agent policy gradients. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32 (2018)","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"2156_CR15","unstructured":"Yang, J., Nakhaei, A., Isele, D., Fujimura, K., Zha, H.: Cm3: Cooperative multi-goal multi-stage multi-agent reinforcement learning. In: International Conference on Learning Representations (2020)"},{"key":"2156_CR16","first-page":"24611","volume":"35","author":"C Yu","year":"2022","unstructured":"Yu, C., Velu, A., Vinitsky, E., Gao, J., Wang, Y., Bayen, A., Wu, Y.: The surprising effectiveness of ppo in cooperative multi-agent games. Adv. Neural Inf. Process. Syst. 35, 24611\u201324624 (2022)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"2156_CR17","unstructured":"Kuba, J., Chen, R., Wen, M., Wen, Y., Sun, F., Wang, J., Yang, Y.: Trust region policy optimisation in multi-agent reinforcement learning. In: ICLR 2022-10th International Conference on Learning Representations, pp. 1046 (2022). The International Conference on Learning Representations (ICLR)"},{"issue":"4","key":"2156_CR18","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1007\/s10846-023-01917-z","volume":"108","author":"E Amhraoui","year":"2023","unstructured":"Amhraoui, E., Masrour, T.: Smooth q-learning: An algorithm for independent learners in stochastic cooperative markov games. J. Intell. Robot. Syst. 108(4), 65 (2023)","journal-title":"J. Intell. Robot. Syst."},{"key":"2156_CR19","first-page":"16509","volume":"35","author":"M Wen","year":"2022","unstructured":"Wen, M., Kuba, J., Lin, R., Zhang, W., Wen, Y., Wang, J., Yang, Y.: Multi-agent reinforcement learning is a sequence modeling problem. Adv. Neural Inf. Process. Syst. 35, 16509\u201316521 (2022)","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"1","key":"2156_CR20","first-page":"1437","volume":"16","author":"J Garc\u0131a","year":"2015","unstructured":"Garc\u0131a, J., Fern\u00e1ndez, F.: A comprehensive survey on safe reinforcement learning. J. Mach. Learn. Res. 16(1), 1437\u20131480 (2015)","journal-title":"J. Mach. Learn. Res."},{"key":"2156_CR21","unstructured":"Gu, S., Yang, L., Du, Y., Chen, G., Walter, F., Wang, J., Yang, Y., Knoll, A.: A review of safe reinforcement learning: Methods, theory and applications. arXiv:2205.10330 (2022)"},{"issue":"167","key":"2156_CR22","first-page":"1","volume":"18","author":"Y Chow","year":"2018","unstructured":"Chow, Y., Ghavamzadeh, M., Janson, L., Pavone, M.: Risk-constrained reinforcement learning with percentile risk criteria. J. Mach. Learn. Res. 18(167), 1\u201351 (2018)","journal-title":"J. Mach. Learn. Res."},{"key":"2156_CR23","unstructured":"Achiam, J., Held, D., Tamar, A., Abbeel, P.: Constrained policy optimization. In: International Conference on Machine Learning, pp. 22\u201331 (2017). PMLR"},{"key":"2156_CR24","unstructured":"Yang, T.-Y., Rosca, J., Narasimhan, K., Ramadge, P.J.: Projection-based constrained policy optimization. In: International Conference on Learning Representations (2019)"},{"key":"2156_CR25","unstructured":"T.-Y. Yang, J. Rosca, K. Narasimhan, and P. J. Ramadge: Accelerating safe reinforcement learning with constraint-mismatched baseline policies. In: International Conference on Machine Learning, pp. 11795\u201311807 (2021). PMLR"},{"key":"2156_CR26","doi-asserted-by":"publisher","first-page":"103905","DOI":"10.1016\/j.artint.2023.103905","volume":"319","author":"S Gu","year":"2023","unstructured":"Gu, S., Kuba, J.G., Chen, Y., Du, Y., Yang, L., Knoll, A., Yang, Y.: Safe multi-agent reinforcement learning for multi-robot control. Artif. Intell. 319, 103905 (2023)","journal-title":"Artif. Intell."},{"key":"2156_CR27","unstructured":"Ziyan, W., Yali, D., Aivar, S., Haitham\u00a0Bou, A., Jun, W.: Cama : A new framework for safe multi-agent reinforcement learning using constraint augmentation. (2023)"},{"key":"2156_CR28","unstructured":"Sootla, A., Cowen-Rivers, A.I., Jafferjee, T., Wang, Z., Mguni, D.H., Wang, J., Ammar, H.: Saut\u00e9 rl: Almost surely safe reinforcement learning using state augmentation. In: International Conference on Machine Learning, pp. 20423\u201320443 (2022). PMLR"},{"key":"2156_CR29","doi-asserted-by":"crossref","unstructured":"Zhao, W., He, T., Chen, R., Wei, T., Liu, C.: State-wise safe reinforcement learning: A survey. arXiv:2302.03122. (2023)","DOI":"10.24963\/ijcai.2023\/763"},{"key":"2156_CR30","unstructured":"ElSayed-Aly, I., Bharadwaj, S., Amato, C., Ehlers, R., Topcu, U., Feng, L.: Safe multi-agent reinforcement learning via shielding, 483\u2013491 (2021)"},{"key":"2156_CR31","doi-asserted-by":"crossref","unstructured":"Li, S., Wu, Y., Cui, X., Dong, H., Fang, F., Russell, S.: Robust multi-agent reinforcement learning via minimax deep deterministic policy gradient. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 4213\u20134220 (2019)","DOI":"10.1609\/aaai.v33i01.33014213"},{"key":"2156_CR32","doi-asserted-by":"crossref","unstructured":"Bansal, S., Chen, M., Herbert, S., Tomlin, C.J.: Hamilton-jacobi reachability: A brief overview and recent advances. In: 2017 IEEE 56th Annual Conference on Decision and Control (CDC), pp. 2242\u20132253 (2017). IEEE","DOI":"10.1109\/CDC.2017.8263977"},{"key":"2156_CR33","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1146\/annurev-control-060117-104941","volume":"1","author":"M Chen","year":"2018","unstructured":"Chen, M., Tomlin, C.J.: Hamilton-jacobi reachability: Some recent theoretical advances and applications in unmanned airspace management. Ann. Rev. Control Robot. Auton. Syst. 1, 333\u2013358 (2018)","journal-title":"Ann. Rev. Control Robot. Auton. Syst."},{"issue":"2","key":"2156_CR34","doi-asserted-by":"publisher","first-page":"3663","DOI":"10.1109\/LRA.2021.3063989","volume":"6","author":"YS Shao","year":"2021","unstructured":"Shao, Y.S., Chen, C., Kousik, S., Vasudevan, R.: Reachability-based trajectory safeguard (rts): A safe and fast reinforcement learning safety layer for continuous control. IEEE Robot. Autom. Lett. 6(2), 3663\u20133670 (2021)","journal-title":"IEEE Robot. Autom. Lett."},{"key":"2156_CR35","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1109\/OJCSYS.2023.3256305","volume":"2","author":"N Kochdumper","year":"2023","unstructured":"Kochdumper, N., Krasowski, H., Wang, X., Bak, S., Althoff, M.: Provably safe reinforcement learning via action projection using reachability analysis and polynomial zonotopes. IEEE Open J. Control Syst. 2, 79\u201392 (2023)","journal-title":"IEEE Open J. Control Syst."},{"issue":"4","key":"2156_CR36","doi-asserted-by":"publisher","first-page":"10665","DOI":"10.1109\/LRA.2022.3192205","volume":"7","author":"M Selim","year":"2022","unstructured":"Selim, M., Alanwar, A., Kousik, S., Gao, G., Pavone, M., Johansson, K.H.: Safe reinforcement learning using black-box reachability analysis. IEEE Robot. Autom. Lett. 7(4), 10665\u201310672 (2022)","journal-title":"IEEE Robot. Autom. Lett."},{"key":"2156_CR37","doi-asserted-by":"publisher","unstructured":"Hsu, K.-C., Rubies-Royo, V., Tomlin, C.J., Fisac, J.F.: Safety and liveness guarantees through reach-avoid reinforcement learning. In: Proceedings of Robotics: Science and Systems, Held Virtually (2021). https:\/\/doi.org\/10.15607\/RSS.2021.XVII.077","DOI":"10.15607\/RSS.2021.XVII.077"},{"key":"2156_CR38","unstructured":"Yu, D., Ma, H., Li, S., Chen, J.: Reachability constrained reinforcement learning. In: International Conference on Machine Learning, pp. 25636\u201325655 (2022). PMLR"},{"key":"2156_CR39","unstructured":"Ganai, M., Gong, Z., Yu, C., Herbert, S., Gao, S.: Iterative reachability estimation for safe reinforcement learning. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"2156_CR40","doi-asserted-by":"crossref","unstructured":"Bardi, M., Falcone, M., Soravia, P.: Numerical methods for pursuit-evasion games via viscosity solutions. In: Stochastic and Differential Games: Theory and Numerical Methods, pp. 105\u2013175 (1999)","DOI":"10.1007\/978-1-4612-1592-9_3"},{"key":"2156_CR41","doi-asserted-by":"crossref","unstructured":"Munos, R., Baird, L.C., Moore, A.W.: Gradient descent approaches to neural-net-based solutions of the hamilton-jacobi-bellman equation. In: IJCNN\u201999. International Joint Conference on Neural Networks. Proceedings (Cat. No. 99CH36339), vol. 3, pp. 2152\u20132157 (1999). IEEE","DOI":"10.1109\/IJCNN.1999.832721"},{"key":"2156_CR42","first-page":"1179","volume":"33","author":"A Kumar","year":"2020","unstructured":"Kumar, A., Zhou, A., Tucker, G., Levine, S.: Conservative q-learning for offline reinforcement learning. Adv. Neural Inf. Process. Systems. 33, 1179\u20131191 (2020)","journal-title":"Adv. Neural Inf. Process. Systems."},{"key":"2156_CR43","unstructured":"Bharadhwaj, H., Kumar, A., Rhinehart, N., Levine, S., Shkurti, F., Garg, A.: Conservative safety critics for exploration. In: International Conference on Learning Representations (2021)"},{"issue":"2","key":"2156_CR44","first-page":"229","volume":"17","author":"RS Sutton","year":"1999","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: An introduction. Robotica. 17(2), 229\u2013235 (1999)","journal-title":"Robotica."},{"key":"2156_CR45","doi-asserted-by":"crossref","unstructured":"Mordatch, I., Abbeel, P.: Emergence of grounded compositional language in multi-agent populations. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32 (2018)","DOI":"10.1609\/aaai.v32i1.11492"}],"container-title":["Journal of Intelligent &amp; Robotic Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-024-02156-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10846-024-02156-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-024-02156-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,5]],"date-time":"2025-04-05T21:02:23Z","timestamp":1743886943000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10846-024-02156-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,30]]},"references-count":45,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,3]]}},"alternative-id":["2156"],"URL":"https:\/\/doi.org\/10.1007\/s10846-024-02156-6","relation":{},"ISSN":["1573-0409"],"issn-type":[{"type":"electronic","value":"1573-0409"}],"subject":[],"published":{"date-parts":[[2024,12,30]]},"assertion":[{"value":"1 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 August 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 December 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflicts of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}},{"value":"All authors of this research paper have consented to participate in the research study.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"All authors of this research paper have read and approved the submitted version.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}],"article-number":"7"}}