{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T08:04:58Z","timestamp":1769846698187,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,4,2]],"date-time":"2020-04-02T00:00:00Z","timestamp":1585785600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,4,2]]},"DOI":"10.1145\/3368555.3384450","type":"proceedings-article","created":{"date-parts":[[2020,3,20]],"date-time":"2020-03-20T20:37:37Z","timestamp":1584736657000},"page":"1-9","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Defining admissible rewards for high-confidence policy evaluation in batch reinforcement learning"],"prefix":"10.1145","author":[{"given":"Niranjani","family":"Prasad","sequence":"first","affiliation":[{"name":"Princeton University, Princeton NJ"}]},{"given":"Barbara","family":"Engelhardt","sequence":"additional","affiliation":[{"name":"Princeton University, Princeton NJ"}]},{"given":"Finale","family":"Doshi-Velez","sequence":"additional","affiliation":[{"name":"Harvard SEAS, Cambridge MA"}]}],"member":"320","published-online":{"date-parts":[[2020,4,2]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"e_1_3_2_1_2_1","volume-title":"Learning Optimal Policies from Observational Data. arXiv preprint arXiv:1802.08679","author":"Atan Onur","year":"2018"},{"key":"e_1_3_2_1_3_1","volume-title":"arXiv preprint arXiv:1606.01540","author":"Brockman Greg","year":"2016"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11755"},{"key":"e_1_3_2_1_5_1","unstructured":"Paul F Christiano Jan Leike Tom Brown Miljan Martic Shane Legg and Dario Amodei. 2017. Deep reinforcement learning from human preferences. In Advances in Neural Information Processing Systems. 4299--4307.  Paul F Christiano Jan Leike Tom Brown Miljan Martic Shane Legg and Dario Amodei. 2017. Deep reinforcement learning from human preferences. In Advances in Neural Information Processing Systems . 4299--4307."},{"key":"e_1_3_2_1_6_1","volume-title":"but How Exactly? A Method for Evaluating Activity Sequences from Data","author":"Doroudi Shayan","year":"2016"},{"key":"e_1_3_2_1_7_1","first-page":"503","article-title":"Tree-based batch mode reinforcement learning","author":"Ernst Damien","year":"2005","journal-title":"Journal of Machine Learning Research 6"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2006.377527"},{"key":"e_1_3_2_1_9_1","unstructured":"Mohammad Ghavamzadeh Marek Petrik and Yinlam Chow. 2016. Safe policy improvement by minimizing robust baseline regret. In Advances in Neural Information Processing Systems. 2298--2306.  Mohammad Ghavamzadeh Marek Petrik and Yinlam Chow. 2016. Safe policy improvement by minimizing robust baseline regret. In Advances in Neural Information Processing Systems . 2298--2306."},{"key":"e_1_3_2_1_10_1","unstructured":"Dylan Hadfield-Menell Smitha Milli Pieter Abbeel Stuart J Russell and Anca Dragan. 2017. Inverse reward design. In Advances in Neural Information Processing Systems. 6765--6774.  Dylan Hadfield-Menell Smitha Milli Pieter Abbeel Stuart J Russell and Anca Dragan. 2017. Inverse reward design. In Advances in Neural Information Processing Systems . 6765--6774."},{"key":"e_1_3_2_1_11_1","unstructured":"Jessie Huang Fa Wu Doina Precup and Yang Cai. 2018. Learning safe policies with expert guidance. In Advances in Neural Information Processing Systems. 9105--9114.  Jessie Huang Fa Wu Doina Precup and Yang Cai. 2018. Learning safe policies with expert guidance. In Advances in Neural Information Processing Systems . 9105--9114."},{"key":"e_1_3_2_1_12_1","volume-title":"Leo Anthony Celi, and Roger G Mark","author":"Johnson Alistair EW","year":"2016"},{"key":"e_1_3_2_1_13_1","volume-title":"Efficient algorithms for on-line optimization. J. Comput. System Sci. 71","author":"Kalai Adam","year":"2016"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"L Kish. 1968. Survey Sampling. John Wiley & Sons Inc. New York London 1965 IX+ 643 S. 31 Abb. 56 Tab. Preis 83 s. Biometrische Zeitschrift 10 1 (1968) 88--89.  L Kish. 1968. Survey Sampling. John Wiley & Sons Inc. New York London 1965 IX+ 643 S. 31 Abb. 56 Tab. Preis 83 s. Biometrische Zeitschrift 10 1 (1968) 88--89.","DOI":"10.1002\/bimj.19680100122"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-018-0213-5"},{"key":"e_1_3_2_1_16_1","volume-title":"Safe Policy Improvement with Baseline Bootstrapping. arXiv preprint arXiv:1712.06924","author":"Laroche Romain","year":"2017"},{"key":"e_1_3_2_1_17_1","volume-title":"Ai safety gridworlds. arXiv preprint arXiv:1711.09883","author":"Leike Jan","year":"2017"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/2893873.2894020"},{"key":"e_1_3_2_1_19_1","volume-title":"Empirical Bernstein bounds and sample variance penalization. arXiv preprint arXiv:0907.3740","author":"Maurer Andreas","year":"2009"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/EMBC.2016.7591355"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the Conference on Uncertainty in Artificial Intelligence. arXiv preprint arXiv:1704","author":"Prasad Niranjani","year":"2017"},{"key":"e_1_3_2_1_22_1","volume-title":"Eligibility Traces for Off-Policy Policy Evaluation. In ICML'00 Proceedings of the Seventeenth International Conference on Machine Learning.","author":"Precup Doina","year":"2000"},{"key":"e_1_3_2_1_23_1","volume-title":"Safe Policy Learning from Observations. arXiv preprint arXiv:1805.07805","author":"Sarafian Elad","year":"2018"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.3389\/fpubh.2018.00168"},{"key":"e_1_3_2_1_25_1","volume-title":"Informing sequential clinical decision-making through reinforcement learning: an empirical study. Machine learning 84, 1-2","author":"Shortreed Susan M","year":"2011"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.5555\/3104322.3104450"},{"key":"e_1_3_2_1_27_1","volume-title":"The optimal reward problem: Designing effective reward for bounded agents","author":"Sorg Jonathan Daniel"},{"key":"e_1_3_2_1_28_1","volume-title":"International Conference on Machine Learning. 2380--2388","author":"Thomas Philip","year":"2015"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/2888116.2888134"},{"key":"e_1_3_2_1_30_1","volume-title":"Reinforcement learning in healthcare: a survey. arXiv preprint arXiv:1908.08796","author":"Yu Chao","year":"2019"}],"event":{"name":"ACM CHIL '20: ACM Conference on Health, Inference, and Learning","location":"Toronto Ontario Canada","acronym":"ACM CHIL '20","sponsor":["ACM Association for Computing Machinery"]},"container-title":["Proceedings of the ACM Conference on Health, Inference, and Learning"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3368555.3384450","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3368555.3384450","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:01:26Z","timestamp":1750197686000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3368555.3384450"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,4,2]]},"references-count":30,"alternative-id":["10.1145\/3368555.3384450","10.1145\/3368555"],"URL":"https:\/\/doi.org\/10.1145\/3368555.3384450","relation":{},"subject":[],"published":{"date-parts":[[2020,4,2]]},"assertion":[{"value":"2020-04-02","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}