{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T15:28:00Z","timestamp":1777994880565,"version":"3.51.4"},"reference-count":34,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2022ZD0119903"],"award-info":[{"award-number":["2022ZD0119903"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U2141234"],"award-info":[{"award-number":["U2141234"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shanghai Science and Technology Program","award":["19510745200"],"award-info":[{"award-number":["19510745200"]}]},{"DOI":"10.13039\/501100004543","name":"China Scholarship Council","doi-asserted-by":"publisher","award":["202106230194"],"award-info":[{"award-number":["202106230194"]}],"id":[{"id":"10.13039\/501100004543","id-type":"DOI","asserted-by":"publisher"}]},{"name":"SURF Cooperative through the Dutch National e-Infrastructure","award":["EINF-2851"],"award-info":[{"award-number":["EINF-2851"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1109\/tnnls.2023.3326867","type":"journal-article","created":{"date-parts":[[2023,11,2]],"date-time":"2023-11-02T13:59:27Z","timestamp":1698933567000},"page":"1939-1946","source":"Crossref","is-referenced-by-count":16,"title":["Safe Adaptive Policy Transfer Reinforcement Learning for Distributed Multiagent Control"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5308-3546","authenticated-orcid":false,"given":"Bin","family":"Du","sequence":"first","affiliation":[{"name":"Ocean Institute, Northwestern Polytechnical University, Taicang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4984-6659","authenticated-orcid":false,"given":"Wei","family":"Xie","sequence":"additional","affiliation":[{"name":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3109-0953","authenticated-orcid":false,"given":"Yang","family":"Li","sequence":"additional","affiliation":[{"name":"College of Mechanical and Vehicle Engineering, Hunan University, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9686-2697","authenticated-orcid":false,"given":"Qisong","family":"Yang","sequence":"additional","affiliation":[{"name":"Xi&#x2019;an Institute of High-Tech, Xi&#x2019;an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4700-1276","authenticated-orcid":false,"given":"Weidong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9784-1225","authenticated-orcid":false,"given":"Rudy R.","family":"Negenborn","sequence":"additional","affiliation":[{"name":"Department of Maritime and Transport Technology, Delft University of Technology, Delft, The Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8094-3436","authenticated-orcid":false,"given":"Yusong","family":"Pang","sequence":"additional","affiliation":[{"name":"Department of Maritime and Transport Technology, Delft University of Technology, Delft, The Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8600-9668","authenticated-orcid":false,"given":"Hongtian","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Automation, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3094901"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-74958-5_70"},{"key":"ref5","article-title":"Policy distillation","author":"Rusu","year":"2015","journal-title":"arXiv:1511.06295"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1503.02531"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3142822"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3105407"},{"key":"ref9","first-page":"10141","article-title":"REPAINT: Knowledge transfer in deep reinforcement learning","volume-title":"Proc. 38th Int. Conf. Mach. Learn. (ICML)","author":"Tao"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3108237"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3106705"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2022.3203977"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3084685"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2967871"},{"key":"ref15","first-page":"9797","article-title":"Safe reinforcement learning in constrained Markov decision processes","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wachi"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2976787"},{"key":"ref17","first-page":"22","article-title":"Constrained policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Achiam"},{"key":"ref18","first-page":"9133","article-title":"Responsive safety in reinforcement learning by PID Lagrangian methods","volume-title":"Proc. 37th Int. Conf. Mach. Learn. (ICML)","author":"Stooke"},{"key":"ref19","volume-title":"Constrained Optimization and Lagrange Multiplier Methods","author":"Bertsekas","year":"2014"},{"key":"ref20","first-page":"13644","article-title":"Constrained variational policy optimization for safe reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Liu"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-022-06187-8"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161256"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i10.17062"},{"key":"ref24","first-page":"883","article-title":"A constrained multi-objective reinforcement learning framework","volume-title":"Proc. 5th Conf. Robot Learn.","author":"Huang"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3192418"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1126\/science.153.3731.34"},{"key":"ref27","first-page":"5916","article-title":"Revisiting the softmax Bellman operator: New benefits and new perspective","volume-title":"Proc. 36th Int. Conf. Mach. Learn. (ICML)","author":"Song"},{"key":"ref28","first-page":"1","article-title":"Transfer learning-motivated intelligent fault diagnosis designs: A survey, insights, and perspectives","author":"Chen","year":"2022","journal-title":"TechRxiv"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1812.05905"},{"key":"ref30","first-page":"1554","article-title":"Safe driving via expert guided policy optimization","volume-title":"Proc. 5th Conf. Robot Learn.","author":"Peng"},{"key":"ref31","first-page":"17037","article-title":"An efficient transfer learning framework for multiagent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Yang"},{"key":"ref32","first-page":"1","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Lowe"},{"key":"ref33","article-title":"Benchmarking safe exploration in deep reinforcement learning","author":"Ray","year":"2019","journal-title":"arXiv:1910.01708"},{"key":"ref34","article-title":"Lyapunov-based safe policy optimization for continuous control","author":"Chow","year":"2019","journal-title":"arXiv:1901.10031"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10832116\/10305216.pdf?arnumber=10305216","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T18:39:09Z","timestamp":1764959949000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10305216\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1]]},"references-count":34,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2023.3326867","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1]]}}}