{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T13:55:07Z","timestamp":1773842107448,"version":"3.50.1"},"reference-count":48,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["82073338"],"award-info":[{"award-number":["82073338"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Sichuan Provincial Science and Technology Department","award":["2022YFS0384"],"award-info":[{"award-number":["2022YFS0384"]}]},{"name":"Sichuan Provincial Science and Technology Department","award":["2022YFQ0108"],"award-info":[{"award-number":["2022YFQ0108"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE J. Biomed. Health Inform."],"published-print":{"date-parts":[[2024,1]]},"DOI":"10.1109\/jbhi.2023.3321099","type":"journal-article","created":{"date-parts":[[2023,10,2]],"date-time":"2023-10-02T18:00:50Z","timestamp":1696269650000},"page":"459-469","source":"Crossref","is-referenced-by-count":15,"title":["Towards Real-World Applications of Personalized Anesthesia Using Policy Constraint Q Learning for Propofol Infusion Control"],"prefix":"10.1109","volume":"28","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2038-7164","authenticated-orcid":false,"given":"Xiuding","family":"Cai","sequence":"first","affiliation":[{"name":"Chengdu Institute of Computer Application, Chinese Academy of Sciences, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6089-6738","authenticated-orcid":false,"given":"Jiao","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Anesthesiology, West China Hospital, Sichuan University &amp; The Research Units of West China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4943-608X","authenticated-orcid":false,"given":"Yaoyao","family":"Zhu","sequence":"additional","affiliation":[{"name":"Chengdu Institute of Computer Application, Chinese Academy of Sciences, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3763-1055","authenticated-orcid":false,"given":"Beimin","family":"Wang","sequence":"additional","affiliation":[{"name":"Chengdu Institute of Computer Application, Chinese Academy of Sciences, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4752-0102","authenticated-orcid":false,"given":"Yu","family":"Yao","sequence":"additional","affiliation":[{"name":"Chengdu Institute of Computer Application, Chinese Academy of Sciences, Chengdu, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/S0140-6736(08)60878-8"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1213\/ANE.0000000000002258"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1097\/ALN.0000000000003722"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1213\/ANE.0000000000000769"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1213\/ANE.0000000000001372"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1213\/ANE.0000000000004646"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3037725"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1213\/ANE.0000000000001394"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-021-01599-w"},{"key":"ref10","first-page":"147","article-title":"Continuous state-space models for optimal sepsis treatment: A deep reinforcement learning approach","volume-title":"Proc. Mach. Learn. Healthcare Conf.","author":"Raghu","year":"2017"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.cmpb.2020.105443"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2020.3014556"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2023.3249571"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1213\/ANE.0b013e318202cb7c"},{"key":"ref15","first-page":"2174","article-title":"Multi-task time series analysis applied to drug response modelling","volume-title":"Proc. 22nd Int. Conf. Artif. Intell. Statist.","author":"Bird","year":"2019"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICOIN50884.2021.9333933"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i13.26862"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2023.104376"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2022.3183854"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20627-6_11"},{"key":"ref21","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto","year":"2019"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref23","first-page":"1179","article-title":"Conservative Q-learning for offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kumar","year":"2020"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.anclin.2021.03.012"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TBME.2008.915670"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1097\/00000542-200604000-00012"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/NER.2013.6696208"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.artmed.2021.102227"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3190379"},{"key":"ref30","article-title":"AWAC: Accelerating online reinforcement learning with offline datasets","author":"Nair","year":"2020"},{"key":"ref31","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"issue":"315","key":"ref32","first-page":"1","article-title":"d3rlpy: An offline deep reinforcement learning library","volume-title":"J. Mach. Learn. Res.","volume":"23","author":"Seno","year":"2022"},{"key":"ref33","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. 3rd Int. Conf. Learn. Representations","author":"Kingma","year":"2015"},{"key":"ref34","article-title":"Soft actor-critic algorithms and applications","author":"Haarnoja","year":"2018"},{"key":"ref35","first-page":"3703","article-title":"Batch policy learning under constraints","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Le","year":"2019"},{"key":"ref36","first-page":"20132","article-title":"A minimalist approach to offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Fujimoto","year":"2021"},{"key":"ref37","first-page":"1711","article-title":"Mildly conservative Q-learning for offline reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lyu","year":"2022"},{"key":"ref38","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto","year":"2018"},{"key":"ref39","article-title":"Empirical study of off-policy policy evaluation for reinforcement learning","volume-title":"Proc. Neural Inf. Process. Syst. Track Datasets Benchmarks","volume":"1","author":"Voloshin","year":"2021"},{"key":"ref40","first-page":"2","article-title":"Model selection for offline reinforcement learning: Practical considerations for healthcare settings","volume-title":"Proc. Mach. Learn. Healthcare Conf.","author":"Tang","year":"2021"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2023.3303367"},{"key":"ref42","article-title":"Off-policy evaluation via off-policy classification","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Irpan","year":"2019"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.3321\/j.issn:0529-6579.2007.z1.029"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1038\/s41551-018-0304-0"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3027314"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/j.cose.2021.102355"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2022.3181823"},{"key":"ref48","article-title":"Federated offline reinforcement learning","author":"Zhou","year":"2022"}],"container-title":["IEEE Journal of Biomedical and Health Informatics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6221020\/10380963\/10268595.pdf?arnumber=10268595","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T04:27:49Z","timestamp":1705033669000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10268595\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1]]},"references-count":48,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/jbhi.2023.3321099","relation":{},"ISSN":["2168-2194","2168-2208"],"issn-type":[{"value":"2168-2194","type":"print"},{"value":"2168-2208","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,1]]}}}