{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,27]],"date-time":"2025-08-27T05:10:16Z","timestamp":1756271416185,"version":"3.44.0"},"reference-count":18,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T00:00:00Z","timestamp":1748217600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T00:00:00Z","timestamp":1748217600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["SES-2332054,AST2232455,AST-2132700,CNS-2148128"],"award-info":[{"award-number":["SES-2332054,AST2232455,AST-2132700,CNS-2148128"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,26]]},"DOI":"10.23919\/wiopt66569.2025.11123372","type":"proceedings-article","created":{"date-parts":[[2025,8,26]],"date-time":"2025-08-26T19:04:05Z","timestamp":1756235045000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["Fair Dynamic Spectrum Access via Fully Decentralized Multi-Agent Reinforcement Learning"],"prefix":"10.23919","author":[{"given":"Yubo","family":"Zhang","sequence":"first","affiliation":[{"name":"Northwestern University,Department of Electrical and Computer Engineering,USA"}]},{"given":"Pedro","family":"Botelho","sequence":"additional","affiliation":[{"name":"Northwestern University,Department of Electrical and Computer Engineering,USA"}]},{"given":"Trevor","family":"Gordon","sequence":"additional","affiliation":[{"name":"Columbia University,Department of Electrical Engineering,USA"}]},{"given":"Gil","family":"Zussman","sequence":"additional","affiliation":[{"name":"Columbia University,Department of Electrical Engineering,USA"}]},{"given":"Igor","family":"Kadota","sequence":"additional","affiliation":[{"name":"Northwestern University,Department of Electrical and Computer Engineering,USA"}]}],"member":"263","reference":[{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2007.361604"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2006.05.001"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2019.2916583"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2021.3131534"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2022.3233436"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2018.2879433"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2904329"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2021.3126112"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2020.2984227"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2022.3230872"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CCNC49032.2021.9369536"},{"key":"ref13","article-title":"Implicit quantile networks for distributional reinforcement learning","volume-title":"Proc. of ICML","author":"Dabney","year":"2018"},{"key":"ref14","article-title":"Likelihood quantile networks for coordinating multi-agent reinforcement learning","volume-title":"Proc. of AAMAS","author":"Lyu","year":"2020"},{"key":"ref15","article-title":"A quantitative measure of fairness and discrimination for resource allocation in shared systems","volume-title":"Technical Report DEC-TR-301, Tech. Rep.","author":"Jain","year":"1984"},{"key":"ref16","article-title":"Dueling network architectures for deep reinforcement learning","volume-title":"Proc. of ICML","author":"Wang","year":"2016"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.2307\/253675"},{"key":"ref18","article-title":"Unpacking reward shaping: Understanding the benefits of reward engineering on sample complexity","author":"Gupta","year":"2022","journal-title":"arXiv preprint"},{"key":"ref19","article-title":"Inform: Mitigating reward hacking in rlhf via information-theoretic reward modeling","author":"Miao","year":"2024","journal-title":"arXiv preprint"}],"event":{"name":"2025 23rd International Symposium on Modeling and Optimization in Mobile, Ad Hoc, and Wireless Networks (WiOpt)","start":{"date-parts":[[2025,5,26]]},"location":"Linkoping, Sweden","end":{"date-parts":[[2025,5,29]]}},"container-title":["2025 23rd International Symposium on Modeling and Optimization in Mobile, Ad Hoc, and Wireless Networks (WiOpt)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11123177\/11123184\/11123372.pdf?arnumber=11123372","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,27]],"date-time":"2025-08-27T04:54:30Z","timestamp":1756270470000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11123372\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,26]]},"references-count":18,"URL":"https:\/\/doi.org\/10.23919\/wiopt66569.2025.11123372","relation":{},"subject":[],"published":{"date-parts":[[2025,5,26]]}}}