{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,2]],"date-time":"2026-03-02T22:10:32Z","timestamp":1772489432539,"version":"3.50.1"},"reference-count":24,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS 2304863"],"award-info":[{"award-number":["CNS 2304863"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS 2339774"],"award-info":[{"award-number":["CNS 2339774"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS 2332476"],"award-info":[{"award-number":["IIS 2332476"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["N00014-23-1-2505"],"award-info":[{"award-number":["N00014-23-1-2505"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Control Syst. Lett."],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/lcsys.2026.3651656","type":"journal-article","created":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T18:37:53Z","timestamp":1767724673000},"page":"3149-3154","source":"Crossref","is-referenced-by-count":0,"title":["Encoding High-Level Knowledge in Offline Multi-Agent Reinforcement Learning Using Reward Machines"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-4301-1430","authenticated-orcid":false,"given":"Shayan","family":"Meshkat Alsadat","sequence":"first","affiliation":[{"name":"School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, AZ, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0440-0912","authenticated-orcid":false,"given":"Zhe","family":"Xu","sequence":"additional","affiliation":[{"name":"School for Engineering of Matter, Transport and Energy, Arizona State University, Tempe, AZ, USA"}]}],"member":"263","reference":[{"key":"ref1","first-page":"20132","article-title":"A minimalist approach to offline reinforcement learning","volume-title":"Proc. 35th Adv. Neural Inf. Process. Syst.","author":"Fujimoto"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1115\/DETC2025-169759"},{"key":"ref3","first-page":"1179","article-title":"Conservative Q-learning for offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kumar"},{"key":"ref4","first-page":"104","article-title":"An optimistic perspective on offline reinforcement learning","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Agarwal"},{"key":"ref5","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Fujimoto"},{"key":"ref6","article-title":"Behavior regularized offline reinforcement learning","author":"Wu","year":"2019","journal-title":"arXiv:1911.11361"},{"key":"ref7","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020","journal-title":"arXiv:2005.01643"},{"key":"ref8","first-page":"52413","article-title":"Offline multi-agent reinforcement learning with implicit global-to-local value regularization","volume-title":"Proc. 37th Adv. Neural Inf. Process. Syst.","author":"Wang"},{"key":"ref9","first-page":"77290","article-title":"Counterfactual conservative Q learning for offline multi-agent reinforcement learning","volume-title":"Proc. 37th Adv. Neural Inf. Process. Syst.","author":"Shao"},{"key":"ref10","article-title":"Learning non-Markovian reward models in MDPs","author":"Rens","year":"2020","journal-title":"arXiv:2001.09293"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.65109\/fzbc2933"},{"key":"ref12","first-page":"2107","article-title":"Using reward machines for high-level task specification and decomposition in reinforcement learning","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Icarte"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCR65461.2025.11072658"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.12440"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i10.17096"},{"issue":"1","key":"ref16","first-page":"927","article-title":"An overview of association rule mining algorithms","volume":"5","author":"Kumbhare","year":"2014","journal-title":"Int. J. Comput. Sci. Inf. Technol."},{"key":"ref17","volume-title":"Learning Automata: An Introduction","author":"Narendra","year":"2012"},{"key":"ref18","first-page":"18","article-title":"Association rule mining: A survey","volume":"135","author":"Zhao","year":"2003","journal-title":"Nanyang Technol. Univ."},{"issue":"1","key":"ref19","first-page":"1","article-title":"Data quality considerations for big data and machine learning: Going beyond data cleaning and transformations","volume":"10","author":"Gudivada","year":"2017","journal-title":"Int. J. Adv. Softw."},{"key":"ref20","article-title":"AWAC: Accelerating online reinforcement learning with offline datasets","author":"Nair","year":"2021","journal-title":"arXiv:2006.09359"},{"key":"ref21","first-page":"487","article-title":"Fast algorithms for mining association rules","volume-title":"Proc. 20th VLDB Conf.","author":"Agrawal"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-41591-8_2"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1609\/icaps.v30i1.6756"},{"key":"ref24","article-title":"Offline reinforcement learning with implicit Q-learning","author":"Kostrikov","year":"2021","journal-title":"arXiv:2110.06169"}],"container-title":["IEEE Control Systems Letters"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/7782633\/10939047\/11329130-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7782633\/10939047\/11329130.pdf?arnumber=11329130","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,2]],"date-time":"2026-03-02T20:57:52Z","timestamp":1772485072000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11329130\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/lcsys.2026.3651656","relation":{},"ISSN":["2475-1456"],"issn-type":[{"value":"2475-1456","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]}}}