{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:13:39Z","timestamp":1763190819352,"version":"3.45.0"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1109\/ijcnn64981.2025.11228072","type":"proceedings-article","created":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T18:46:15Z","timestamp":1763145975000},"page":"1-10","source":"Crossref","is-referenced-by-count":0,"title":["OCMDP: Observation-Constrained Markov Decision Process"],"prefix":"10.1109","author":[{"given":"Taiyi","family":"Wang","sequence":"first","affiliation":[{"name":"Powersense Technology Limited"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianheng","family":"Liu","sequence":"additional","affiliation":[{"name":"University College London"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bryan","family":"Lee","sequence":"additional","affiliation":[{"name":"Powersense Technology Limited"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhihao","family":"Wu","sequence":"additional","affiliation":[{"name":"University of Edinburgh"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu","family":"Wu","sequence":"additional","affiliation":[{"name":"University of Cambridge"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"article-title":"Learned graph rewriting with equality saturation: A new paradigm in relational query rewrite and beyond","year":"2024","author":"B\u0103rbulescu","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-71152-7_4"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3642970.3655839"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.5220\/0010179904830489"},{"volume-title":"Sequential Learning and Optimization","year":"2010","author":"Haupt","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2013.2241057"},{"article-title":"Learning for robot decision making under distribution shift: A survey","year":"2022","author":"Paudel","key":"ref7"},{"key":"ref8","article-title":"Domain shifts in reinforcement learning: Identifying disturbances in environments","volume-title":"AISafety@ IJCAI","author":"Haider","year":"2021"},{"key":"ref9","first-page":"2633","article-title":"Learning optimal and interpretable policies for healthcare using deep reinforcement learning","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","author":"Chen"},{"key":"ref10","article-title":"Active learning literature survey","volume-title":"University of Wisconsin-Madison, Tech. Rep.","author":"Yu","year":"2009"},{"article-title":"An empirical study of representation learning for reinforcement learning in healthcare","year":"2020","author":"Killian","key":"ref11"},{"article-title":"Challenges for reinforcement learning in healthcare","year":"2021","author":"Riachi","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.surg.2020.11.040"},{"key":"ref14","first-page":"2164","article-title":"Monte-Carlo planning in large POMDPs","author":"Silver","year":"2010","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(98)00023-X"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.3389\/frai.2021.798659"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_12"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781316471104"},{"key":"ref19","first-page":"451","article-title":"Asac: Active sensing using actor-critic models","volume-title":"Machine Learning for Healthcare Conference","author":"Yoon"},{"volume-title":"Reinforcement learning: An introduction","year":"2018","author":"Sutton","key":"ref20"},{"article-title":"Reinforcement learning with efficient active feature acquisition","year":"2020","author":"Yin","key":"ref21"},{"key":"ref22","first-page":"15 650","article-title":"Reinforcement learning with state observation costs in action-contingent noiselessly observable markov decision processes","volume":"34","author":"Nam","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21428\/594757db.72846d04"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21428\/594757db.8e09102d"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-018-0316-z"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1056\/nejmra1814259"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1001\/jama.2016.17216"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1038\/nature21056"},{"issue":"4","key":"ref29","first-page":"277","article-title":"Opportunities in machine learning for healthcare","volume":"6","author":"Komorowski","year":"2018","journal-title":"Big Data"},{"issue":"10","key":"ref30","first-page":"586","article-title":"Reinforcement learning for optimizing sepsis treatment","volume":"2","author":"Nair","year":"2020","journal-title":"Nature Machine Intelligence"},{"article-title":"The medkit-learn(ing) environment: Medical decision modelling through simulation","volume-title":"Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks","author":"Chan","key":"ref31"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.12794\/metadc1505267"},{"key":"ref33","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International conference on machine learning","author":"Haarnoja"},{"article-title":"Continuous control with deep reinforcement learning","year":"2015","author":"Lillicrap","key":"ref34"}],"event":{"name":"2025 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2025,6,30]]},"location":"Rome, Italy","end":{"date-parts":[[2025,7,5]]}},"container-title":["2025 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11227166\/11227148\/11228072.pdf?arnumber=11228072","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:11:36Z","timestamp":1763190696000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11228072\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/ijcnn64981.2025.11228072","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]}}}