{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T05:58:47Z","timestamp":1775627927899,"version":"3.50.1"},"reference-count":23,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T00:00:00Z","timestamp":1764720000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T00:00:00Z","timestamp":1764720000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,3]]},"DOI":"10.1109\/icmla66185.2025.00158","type":"proceedings-article","created":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T19:54:58Z","timestamp":1775591698000},"page":"1040-1045","source":"Crossref","is-referenced-by-count":0,"title":["Strategy Masking: A Method for Guardrails in Value-based Reinforcement Learning Agents"],"prefix":"10.1109","author":[{"given":"Jonathan","family":"Keane","sequence":"first","affiliation":[{"name":"Milwaukee School of Engineering,Diercks School of Advanced Computing,Milwaukee,WI,USA"}]},{"given":"Sam","family":"Keyser","sequence":"additional","affiliation":[{"name":"Milwaukee School of Engineering,Diercks School of Advanced Computing,Milwaukee,WI,USA"}]},{"given":"Jeremy","family":"Kedziora","sequence":"additional","affiliation":[{"name":"Milwaukee School of Engineering,Diercks School of Advanced Computing,Milwaukee,WI,USA"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Reinforcement Learning: An Introduction.","author":"Sutton","year":"2018"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-022-09552-y"},{"issue":"4","key":"ref3","first-page":"1102","article-title":"Multi-agent reinforcement learning for adaptive scheduling: application to multi-site company","volume-title":"IFAC Proceedings Volumes","volume":"42","author":"Aissani"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.4018\/jats.2009040104"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/1555301.1555311"},{"key":"ref6","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v39i18.34068","article-title":"Pareto set learning for multi-objective reinforcement learning","author":"Liu","year":"2025"},{"key":"ref7","article-title":"Task decomposition in reinforcement learning","volume-title":"1994 AAAI Spring Symposium","author":"Karlsson"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1613\/jair.639"},{"key":"ref9","article-title":"Hierarchical and interpretable skill acquisition in multi-task reinforcement learning","volume-title":"International Conference on Learning Representations","author":"Shu"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50045-3"},{"key":"ref11","article-title":"Hybrid reward architecture for reinforcement learning","volume":"abs\/1706.04208","author":"van Seijen","year":"2017","journal-title":"CoRR"},{"key":"ref12","article-title":"Q-decomposition for reinforcement learning agents","volume-title":"International Conference on Machine Learning","author":"Russell"},{"key":"ref13","first-page":"2107","article-title":"Using reward machines for high-level task specification and decomposition in reinforcement learning","volume-title":"Proceedings of the 35th International Conference on Machine Learning","volume":"80","author":"Icarte"},{"key":"ref14","author":"Juozapaitis","year":"2019","journal-title":"Explainable reinforcement learning via reward decomposition"},{"key":"ref15","article-title":"Never give up: Learning directed exploration strategies","author":"Badia","year":"2020"},{"issue":"2","key":"ref16","article-title":"Learning state-specific action masks for reinforcement learning","volume-title":"Algorithms","volume":"17","author":"Wang","year":"2024"},{"key":"ref17","article-title":"Playing atari with deep reinforcement learning","volume-title":"CoRR","volume":"abs\/1312.5602","author":"Mnih","year":"2013"},{"key":"ref18","article-title":"Deep recurrent q-learning for partially observable mdps","volume-title":"CoRR","volume":"abs\/1507.06527","author":"Hausknecht","year":"2015"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref21","article-title":"Toward a theory of reinforcement learning connectionist systems","volume-title":"Northeastern University, College of Computer Science, Technical Report NU-CCS-88-3","author":"Williams","year":"1988"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022672621406"},{"key":"ref23","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"}],"event":{"name":"2025 International Conference on Machine Learning and Applications (ICMLA)","location":"Boca Raton, FL, USA","start":{"date-parts":[[2025,12,3]]},"end":{"date-parts":[[2025,12,5]]}},"container-title":["2025 International Conference on Machine Learning and Applications (ICMLA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11471302\/11471304\/11471409.pdf?arnumber=11471409","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T05:19:16Z","timestamp":1775625556000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11471409\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,3]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/icmla66185.2025.00158","relation":{},"subject":[],"published":{"date-parts":[[2025,12,3]]}}}