{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:07:44Z","timestamp":1775228864812,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,21]],"date-time":"2023-10-21T00:00:00Z","timestamp":1697846400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022ZD0116402"],"award-info":[{"award-number":["2022ZD0116402"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U22B2057"],"award-info":[{"award-number":["U22B2057"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,21]]},"DOI":"10.1145\/3583780.3615043","type":"proceedings-article","created":{"date-parts":[[2023,10,21]],"date-time":"2023-10-21T07:45:26Z","timestamp":1697874326000},"page":"885-894","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":67,"title":["Safe-NORA: Safe Reinforcement Learning-based Mobile Network Resource Allocation for Diverse User Demands"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0454-7516","authenticated-orcid":false,"given":"Wenzhen","family":"Huang","sequence":"first","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4343-703X","authenticated-orcid":false,"given":"Tong","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8902-7968","authenticated-orcid":false,"given":"Yuting","family":"Cao","sequence":"additional","affiliation":[{"name":"China Mobile Research Institute, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0174-6053","authenticated-orcid":false,"given":"Zhe","family":"Lyu","sequence":"additional","affiliation":[{"name":"China Mobile Research Institute, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2042-8157","authenticated-orcid":false,"given":"Yanping","family":"Liang","sequence":"additional","affiliation":[{"name":"China Mobile Research Institute, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2152-0234","authenticated-orcid":false,"given":"Li","family":"Yu","sequence":"additional","affiliation":[{"name":"China Mobile Research Institute, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0419-5514","authenticated-orcid":false,"given":"Depeng","family":"Jin","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5795-808X","authenticated-orcid":false,"given":"Junge","family":"Zhang","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5617-1659","authenticated-orcid":false,"given":"Yong","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"International conference on machine learning. PMLR, 22--31","author":"Achiam Joshua","year":"2017","unstructured":"Joshua Achiam, David Held, Aviv Tamar, and Pieter Abbeel. 2017. Constrained policy optimization. In International conference on machine learning. PMLR, 22--31."},{"key":"e_1_3_2_1_2_1","volume-title":"Constrained Markov decision processes: stochastic modeling","author":"Altman Eitan","unstructured":"Eitan Altman. 1999. Constrained Markov decision processes: stochastic modeling. Routledge."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.5555\/3122009.3242024"},{"key":"e_1_3_2_1_4_1","volume-title":"International Conference on Artificial Intelligence and Statistics. PMLR, 3304--3312","author":"Ding Dongsheng","year":"2021","unstructured":"Dongsheng Ding, Xiaohan Wei, Zhuoran Yang, Zhaoran Wang, and Mihailo Jovanovic. 2021. Provably efficient safe exploration via primal-dual policy optimization. In International Conference on Artificial Intelligence and Statistics. PMLR, 3304--3312."},{"key":"e_1_3_2_1_5_1","first-page":"8378","article-title":"Natural policy gradient primal-dual method for constrained markov decision processes","volume":"33","author":"Ding Dongsheng","year":"2020","unstructured":"Dongsheng Ding, Kaiqing Zhang, Tamer Basar, and Mihailo Jovanovic. 2020. Natural policy gradient primal-dual method for constrained markov decision processes. Advances in Neural Information Processing Systems, Vol. 33 (2020), 8378--8390.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/GLOBECOM38437.2019.9014032"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886795"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581791.3597297"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2933762"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599853"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2021.3138915"},{"key":"e_1_3_2_1_12_1","volume-title":"Resource Allocation and Task Scheduling in Fog Computing and Internet of Everything Environments: A Taxonomy, Review, and Future Directions. ACM Computing Surveys (CSUR)","author":"Jamil Bushra","year":"2022","unstructured":"Bushra Jamil, Humaira Ijaz, Mohammad Shojafar, Kashif Munir, and Rajkumar Buyya. 2022. Resource Allocation and Task Scheduling in Fog Computing and Internet of Everything Environments: A Taxonomy, Review, and Future Directions. ACM Computing Surveys (CSUR) (2022)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3378430"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCOM.110.2100042"},{"key":"e_1_3_2_1_15_1","first-page":"5138","article-title":"Matrix encoding networks for neural combinatorial optimization","volume":"34","author":"Kwon Yeong-Dae","year":"2021","unstructured":"Yeong-Dae Kwon, Jinho Choo, Iljoo Yoon, Minah Park, Duwon Park, and Youngjune Gwon. 2021. Matrix encoding networks for neural combinatorial optimization. In Advances in Neural Information Processing Systems, Vol. 34. 5138--5149.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2020.2984364"},{"key":"e_1_3_2_1_17_1","article-title":"Performance Analysis of Co-and Cross-tier Device-to-Device Communication Underlaying Macro-small Cell Wireless Networks","volume":"10","author":"Li Tong","year":"2016","unstructured":"Tong Li, Zhu Xiao, Hassana Maigary Georges, Zhinian Luo, and Dong Wang. 2016. Performance Analysis of Co-and Cross-tier Device-to-Device Communication Underlaying Macro-small Cell Wireless Networks. KSII Transactions on Internet & Information Systems, Vol. 10, 4 (2016).","journal-title":"KSII Transactions on Internet & Information Systems"},{"key":"e_1_3_2_1_18_1","volume-title":"Carbon emissions and sustainability of launching 5G mobile networks in China. arXiv preprint arXiv:2306.08337","author":"Li Tong","year":"2023","unstructured":"Tong Li, Li Yu, Yibo Ma, Tong Duan, Wenzhen Huang, Yan Zhou, Depeng Jin, Yong Li, and Tao Jiang. 2023. Carbon emissions and sustainability of launching 5G mobile networks in China. arXiv preprint arXiv:2306.08337 (2023)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5932"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599888"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2019.2916583"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICC.2019.8761431"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei A Rusu Joel Veness Marc G Bellemare Alex Graves Martin Riedmiller Andreas K Fidjeland Georg Ostrovski et al. 2015. Human-level control through deep reinforcement learning. nature Vol. 518 7540 (2015) 529--533.","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_1_24_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Paternain Santiago","year":"2019","unstructured":"Santiago Paternain, Luiz Chamon, Miguel Calvo-Fullana, and Alejandro Ribeiro. 2019. Constrained reinforcement learning has zero duality gap. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_25_1","volume-title":"Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)."},{"key":"e_1_3_2_1_26_1","unstructured":"Li Shen Long Yang Shixiang Chen Bo Yuan Xueqian Wang Dacheng Tao et al. 2022. Penalized Proximal Policy Optimization for Safe Reinforcement Learning. arXiv preprint arXiv:2205.11814 (2022)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCCN.2018.2881442"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2021.3061981"},{"key":"e_1_3_2_1_29_1","volume-title":"International Conference on Machine Learning. PMLR, 9133--9143","author":"Stooke Adam","year":"2020","unstructured":"Adam Stooke, Joshua Achiam, and Pieter Abbeel. 2020. Responsive safety in reinforcement learning by pid lagrangian methods. In International Conference on Machine Learning. PMLR, 9133--9143."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2017.2705720"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2021.3073009"},{"key":"e_1_3_2_1_32_1","volume-title":"Reward constrained policy optimization. arXiv preprint arXiv:1805.11074","author":"Tessler Chen","year":"2018","unstructured":"Chen Tessler, Daniel J Mankowitz, and Shie Mannor. 2018. Reward constrained policy optimization. arXiv preprint arXiv:1805.11074 (2018)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2021.3053771"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2022.3158270"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1002\/wcm.2662"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.3390\/s16111854"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/PIMRC.2016.7794900"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/MWC.001.1900184"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/PIMRC50174.2021.9569358"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.2018.1800109"}],"event":{"name":"CIKM '23: The 32nd ACM International Conference on Information and Knowledge Management","location":"Birmingham United Kingdom","acronym":"CIKM '23","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 32nd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3583780.3615043","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3583780.3615043","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:56Z","timestamp":1750178216000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3583780.3615043"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,21]]},"references-count":40,"alternative-id":["10.1145\/3583780.3615043","10.1145\/3583780"],"URL":"https:\/\/doi.org\/10.1145\/3583780.3615043","relation":{},"subject":[],"published":{"date-parts":[[2023,10,21]]},"assertion":[{"value":"2023-10-21","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}