{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T06:44:08Z","timestamp":1777358648025,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":9,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,12]]},"DOI":"10.1145\/3789418.3789420","type":"proceedings-article","created":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T08:42:29Z","timestamp":1777106549000},"page":"9-16","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Beyond Rewards: Extended Evaluation Metrics for Safe Reinforcement Learning with PPO-Lagrangian"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-9528-3247","authenticated-orcid":false,"given":"Haoran","family":"Lin","sequence":"first","affiliation":[{"name":"Department of Computer Sciences, University of Wisconsin-Madison, Madison, WI, USA,"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6229-5115","authenticated-orcid":false,"given":"Chengyun","family":"Chi","sequence":"additional","affiliation":[{"name":"Department of Computer Sciences, Shanghai Jiao Tong University, Shanghai, Shanghai, China,"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1477-5021","authenticated-orcid":false,"given":"Beining","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Computer Sciences, Tongji University, Shanghai, Shanghai, China,"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,4,25]]},"reference":[{"key":"e_1_3_3_1_1_2","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886795"},{"key":"e_1_3_3_1_2_2","first-page":"31","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Achiam J.","year":"2017","unstructured":"Achiam, J., Held, D., Tamar, A., Abbeel, P. (2017) Constrained policy optimization. Proc. Int. Conf. Mach. Learn. (ICML), pp. 22\u201331."},{"key":"e_1_3_3_1_3_2","first-page":"18964","article-title":"Safety gymnasium: A unified safe reinforcement learning benchmark","volume":"36","author":"Ji J.","year":"2023","unstructured":"Ji, J., Zhang, B., Zhou, J., Pan, X., Huang, W., Sun, R., et al. (2023) Safety gymnasium: A unified safe reinforcement learning benchmark. Adv. Neural Inf. Process. Syst., 36: 18964\u201318993.","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"e_1_3_3_1_4_2","volume-title":"Proc. Int. Conf. Mach. Learn., 119: 9133\u20139143","author":"Stooke A.","year":"2020","unstructured":"Stooke, A., Achiam, J., Abbeel, P. (2020) Responsive safety in reinforcement learning by PID Lagrangian methods. Proc. Int. Conf. Mach. Learn., 119: 9133\u20139143."},{"issue":"12","key":"e_1_3_3_1_5_2","first-page":"10639","article-title":"WCSAC: Worst-case soft actor critic for safety-constrained reinforcement learning","volume":"35","author":"Yang Q.","year":"2021","unstructured":"Yang, Q., Sim\u00e3o, T.D., Tindemans, S.H., Spaan, M.T. (2021) WCSAC: Worst-case soft actor critic for safety-constrained reinforcement learning. Proc. AAAI Conf. Artif. Intell., 35(12): 10639\u201310646.","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3070252"},{"key":"e_1_3_3_1_7_2","volume-title":"Measuring the reliability of reinforcement learning algorithms. arXiv preprint arXiv:1912.05663","author":"Chan S.C.","year":"2019","unstructured":"Chan, S.C., Fishman, S., Canny, J., Korattikara, A., and Guadarrama, S. (2019) Measuring the reliability of reinforcement learning algorithms. arXiv preprint arXiv:1912.05663."},{"key":"e_1_3_3_1_8_2","volume-title":"Datasets and benchmarks for offline safe reinforcement learning. arXiv preprint arXiv:2306.09303","author":"Liu Z.","year":"2023","unstructured":"Liu, Z., Guo, Z., Lin, H., Yao, Y., Zhu, J., Cen, Z., and Zhao, D. (2023) Datasets and benchmarks for offline safe reinforcement learning. arXiv preprint arXiv:2306.09303."},{"key":"e_1_3_3_1_9_2","volume-title":"In\u00a0International Conference on Machine Learning\u00a0(pp. 9133-9143)","author":"Stooke A.","year":"2020","unstructured":"Stooke, A., Achiam, J., & Abbeel, P. (2020) Responsive safety in reinforcement learning by pid lagrangian methods. In\u00a0International Conference on Machine Learning\u00a0(pp. 9133-9143). PMLR."}],"event":{"name":"ICACS 2025: The 9th International Conference on Algorithms, Computing and Systems","location":"Bangkok Thailand","acronym":"ICACS 2025"},"container-title":["Proceedings of the 9th International Conference on Algorithms, Computing and Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3789418.3789420","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T05:54:25Z","timestamp":1777355665000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3789418.3789420"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,12]]},"references-count":9,"alternative-id":["10.1145\/3789418.3789420","10.1145\/3789418"],"URL":"https:\/\/doi.org\/10.1145\/3789418.3789420","relation":{},"subject":[],"published":{"date-parts":[[2025,12,12]]},"assertion":[{"value":"2026-04-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}