{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T22:54:34Z","timestamp":1778799274690,"version":"3.51.4"},"reference-count":13,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T00:00:00Z","timestamp":1720569600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T00:00:00Z","timestamp":1720569600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["CNS 2304863,CNS 2339774,ONR N00014-23-1-2505"],"award-info":[{"award-number":["CNS 2304863,CNS 2339774,ONR N00014-23-1-2505"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.23919\/acc60939.2024.10644503","type":"proceedings-article","created":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T17:56:19Z","timestamp":1725558979000},"page":"522-527","source":"Crossref","is-referenced-by-count":1,"title":["Counterfactually-Guided Causal Reinforcement Learning with Reward Machines"],"prefix":"10.23919","author":[{"given":"Nasim","family":"Baharisangari","sequence":"first","affiliation":[{"name":"School for Engineering of Matter, Transport and Energy, Arizona State University,Tempe,AZ,85287"}]},{"given":"Yash","family":"Paliwal","sequence":"additional","affiliation":[{"name":"School for Engineering of Matter, Transport and Energy, Arizona State University,Tempe,AZ,85287"}]},{"given":"Zhe","family":"Xu","sequence":"additional","affiliation":[{"name":"School for Engineering of Matter, Transport and Energy, Arizona State University,Tempe,AZ,85287"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1156","article-title":"Counterfactual data-fusion for online reinforcement learners","volume-title":"Proceedings of the 34th International Conference on Machine Learning, ser. Proceedings of Machine Learning Research","volume":"70","author":"Forney","year":"2017"},{"key":"ref2","article-title":"Counterfactual data augmentation using locally factored dynamics","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems, ser. NIPS20, Red Hook, NY, USA","author":"Pitis"},{"key":"ref3","article-title":"AdaRL: What, where, and how to adapt in transfer reinforcement learning","volume":"abs\/2107.02729","author":"Huang","year":"2021","journal-title":"ArXiv"},{"key":"ref4","article-title":"Learning causal state representations of partially observable environments","volume":"abs\/1906.10437","author":"Zhang","year":"2019","journal-title":"ArXiv"},{"key":"ref5","volume-title":"Woulda, coulda, shoulda:Counterfactually-guided policy search","author":"Buesing","year":"2018"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1609\/icaps.v30i1.6756"},{"key":"ref7","first-page":"2112","article-title":"Using reward machines for high-level task specification and decom-position in reinforcement learning","volume-title":"ICML, ser. Proceedings of Machine Learning Research","volume":"80","author":"Icarte","year":"2018"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"103989","DOI":"10.1016\/j.artint.2023.103989","article-title":"Learning reward machines: A study in partially observable reinforcement learning","volume":"323","author":"Icarte","year":"2023","journal-title":"Artificial Intelligence"},{"key":"ref9","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume-title":"Proceedings of the Sixteenth International Conference on Machine Learning, ser. ICML 99","author":"Ng","year":"1999"},{"key":"ref10","first-page":"433","article-title":"Dynamic potential-based reward shaping","volume-title":"Proceedings of the 11th International Conference on Autonomous Agents and Multiagent Systems - Volume 1, ser. AAMAS 12","author":"Devlin","year":"2012"},{"key":"ref11","article-title":"Learning reward machines for partially observable reinforcement learning","volume":"32","author":"Toro Icarte","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref12","volume-title":"Playing atari with deep reinforcement learning","author":"Mnih","year":"2013"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"}],"event":{"name":"2024 American Control Conference (ACC)","location":"Toronto, ON, Canada","start":{"date-parts":[[2024,7,10]]},"end":{"date-parts":[[2024,7,12]]}},"container-title":["2024 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10644130\/10644150\/10644503.pdf?arnumber=10644503","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T05:37:47Z","timestamp":1725687467000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10644503\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":13,"URL":"https:\/\/doi.org\/10.23919\/acc60939.2024.10644503","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]}}}