{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T17:23:44Z","timestamp":1779384224052,"version":"3.53.1"},"reference-count":56,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"9","license":[{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,9,1]],"date-time":"2022-09-01T00:00:00Z","timestamp":1661990400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100005937","name":"Cheng Hsin General Hospital Foundation","doi-asserted-by":"publisher","award":["CHGH111-(N)03"],"award-info":[{"award-number":["CHGH111-(N)03"]}],"id":[{"id":"10.13039\/501100005937","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004663","name":"Ministry of Science and Technology, Taiwan","doi-asserted-by":"publisher","award":["MOST110-2221-E-305-012"],"award-info":[{"award-number":["MOST110-2221-E-305-012"]}],"id":[{"id":"10.13039\/501100004663","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE J. Biomed. Health Inform."],"published-print":{"date-parts":[[2022,9]]},"DOI":"10.1109\/jbhi.2022.3183854","type":"journal-article","created":{"date-parts":[[2022,6,17]],"date-time":"2022-06-17T19:25:45Z","timestamp":1655493945000},"page":"4763-4772","source":"Crossref","is-referenced-by-count":24,"title":["Supervised Optimal Chemotherapy Regimen Based on Offline Reinforcement Learning"],"prefix":"10.1109","volume":"26","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1608-8196","authenticated-orcid":false,"given":"Chamani","family":"Shiranthika","sequence":"first","affiliation":[{"name":"Department of Electrical Engineering, National Taipei University, New Taipei City, Taiwan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kuo-Wei","family":"Chen","sequence":"additional","affiliation":[{"name":"Section of Hematology and Oncology, Department of Internal Medicine, Cheng Hsin General Hospital, Taipei, Taiwan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chung-Yih","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Radiation Oncology, Cheng Hsin General Hospital, Taipei, Taiwan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5329-6368","authenticated-orcid":false,"given":"Chan-Yun","family":"Yang","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, National Taipei University, New Taipei City, Taiwan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"B. H.","family":"Sudantha","sequence":"additional","affiliation":[{"name":"Department of Information Technology, University of Moratuwa, Katubedda, Moratuwa, Sri Lanka"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wei-Fu","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, National Taipei University, New Taipei City, Taiwan"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-3618-5"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICHI.2017.45"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TETC.2019.2896325"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.2196\/18477"},{"key":"ref5","volume-title":"Adaptive Treatment Strategies in Practice: Planning Trials and Analyzing Data For Personalized Medicine","author":"Kosorok","year":"2016"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-021-97028-6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.5705\/ss.2012.364"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1214\/13-STS450"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219961"},{"key":"ref10","article-title":"Optimizing patient treatment recommendations using reinforcement learning combined with recurrent neural network patient state simulation","author":"Mei","year":"2019"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1631\/FITEE.2000127"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1186\/s12911-020-1120-5"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/S0895-7177(03)00133-X"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.jtbi.2005.06.037"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICSSE.2010.5551776"},{"key":"ref16","first-page":"161","article-title":"Reinforcement learning with action-derived rewards for chemotherapy and clinical trial dosing regimen selection","volume-title":"Proc. 3rd Mach. Learn. Healthcare Conf.","author":"Yauney"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.4172\/jcsb.1000173"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.biosystems.2011.07.005"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.cmpb.2019.03.004"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.mbs.2017.08.004"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.jtbi.2008.07.002"},{"key":"ref22","article-title":"Replicating and revising current literature on reinforcement learning for strategic chemotherapy dosages","author":"Beck","year":"2019"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/BF00114730"},{"key":"ref24","first-page":"1437","article-title":"A comprehensive survey on safe reinforce-ment learning","volume":"16","author":"Garca","year":"2015","journal-title":"J. Mach. Learn. Res."},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2007.14"},{"key":"ref26","article-title":"Reinforcement learning with supervision from noisy demonstrations","author":"Ning","year":"2020"},{"key":"ref27","first-page":"139","article-title":"Adaptive supervisor: Method of reinforcement learning fault elimination by application of supervised learning","volume-title":"Proc. Federated Conf. Comput. Sci. Inf. Syst.","author":"Krzyszto"},{"key":"ref28","article-title":"Loss is its own reward: Self-supervision for reinforcement learning","author":"Shelhamer","year":"2017"},{"key":"ref29","first-page":"4159","article-title":"Reinforcement learning with supervision by combining multiple learnings and expert advices","volume-title":"Proc. Amer. Control Conf.","author":"Chang"},{"key":"ref30","first-page":"2859","article-title":"Learning from limited demonstrations","volume-title":"Adv. Neural Inf. Process. Syst.","author":"Kim","year":"2013"},{"key":"ref31","first-page":"907","article-title":"S4RL: Surprisingly simple self-supervision for offline reinforcement learning","volume-title":"Proc. 5th Conf. Robot Learn.","author":"Sinha","year":"2022"},{"key":"ref32","article-title":"An analytics approach to designing clinical trials for cancer","author":"Relyea","year":"2013"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1093\/aje\/kwx027"},{"key":"ref34","first-page":"11012","article-title":"Designing optimal dynamic treatment regimens: A causal reinforcement learning approach","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Zhang"},{"key":"ref35","first-page":"13423","article-title":"Near-optimal reinforcement learning in dynamic treatment regimens","volume-title":"Adv. Neural Inf. Process. Syst.","author":"Zhang","year":"2019"},{"issue":"203","key":"ref36","first-page":"1","article-title":"Dynamic control of stochastic evolution: A deep reinforcement learning approach to adaptively targeting emergent drug resistance","volume":"21","author":"Engelhardt","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1002\/sim.3720"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.2200\/s00268ed1v01y201005aim009"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-4095-0"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3_2"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2020.XVI.076"},{"key":"ref42","article-title":"Offline reinforcement learning","author":"Kumar","year":"2020"},{"key":"ref43","article-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems","author":"Levine","year":"2020"},{"key":"ref44","first-page":"503","article-title":"Tree-based batch mode reinforcement learning","volume":"6","author":"Ernst","year":"2005","journal-title":"J. Mach. Learn. Res."},{"key":"ref45","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Fujimoto","year":"2019"},{"key":"ref46","first-page":"11784","article-title":"Stabilizing off-policy Q-learning via bootstrapping error reduction","volume":"32","author":"Kumar","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref47","first-page":"104","article-title":"An optimistic perspective on offline reinforcement learning","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Agarwal","year":"2020"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196935"},{"key":"ref49","article-title":"AWAC: Accelerating online reinforcement learning with offline datasets","author":"Nair","year":"2021"},{"key":"ref50","article-title":"Behavior regularized offline reinforcement learning","author":"Wu","year":"2019"},{"key":"ref51","first-page":"1179","article-title":"Conservative Q-learning for offline reinforcement learning","volume":"33","author":"Kumar","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1038\/laban.254"},{"key":"ref53","article-title":"Evaluating reinforcement learning algorithms in observational health settings","author":"Gottesman","year":"2018"},{"key":"ref54","first-page":"14129","article-title":"MOPO: Model-based offline policy optimization","volume":"33","author":"Yu","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref55","article-title":"Hyperparameter selection for offline reinforcement learning","author":"Paine","year":"2020"},{"key":"ref56","article-title":"d3rlpy - A data-driven deep reinforcement learning library as an out-of-the-box tool","author":"Seno","year":"2020"}],"container-title":["IEEE Journal of Biomedical and Health Informatics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6221020\/9882959\/09798842.pdf?arnumber=9798842","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T04:27:34Z","timestamp":1706761654000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9798842\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9]]},"references-count":56,"journal-issue":{"issue":"9"},"URL":"https:\/\/doi.org\/10.1109\/jbhi.2022.3183854","relation":{},"ISSN":["2168-2194","2168-2208"],"issn-type":[{"value":"2168-2194","type":"print"},{"value":"2168-2208","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,9]]}}}