{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T09:55:19Z","timestamp":1740131719017,"version":"3.37.3"},"reference-count":38,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2022,8,1]],"date-time":"2022-08-01T00:00:00Z","timestamp":1659312000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,8,1]],"date-time":"2022-08-01T00:00:00Z","timestamp":1659312000000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,8,1]],"date-time":"2022-08-01T00:00:00Z","timestamp":1659312000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,8,1]],"date-time":"2022-08-01T00:00:00Z","timestamp":1659312000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["DMS-2015454"],"award-info":[{"award-number":["DMS-2015454"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["DOD-ONR-N00014-18-1-2640"],"award-info":[{"award-number":["DOD-ONR-N00014-18-1-2640"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"name":"NSF","award":["IIS-1901252","CCF-1909499"],"award-info":[{"award-number":["IIS-1901252","CCF-1909499"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Inform. Theory"],"published-print":{"date-parts":[[2022,8]]},"DOI":"10.1109\/tit.2022.3162335","type":"journal-article","created":{"date-parts":[[2022,3,25]],"date-time":"2022-03-25T19:52:55Z","timestamp":1648237975000},"page":"5314-5339","source":"Crossref","is-referenced-by-count":2,"title":["Minimax Off-Policy Evaluation for Multi-Armed Bandits"],"prefix":"10.1109","volume":"68","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2532-0038","authenticated-orcid":false,"given":"Cong","family":"Ma","sequence":"first","affiliation":[{"name":"Department of Statistics, The University of Chicago, Chicago, IL, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7320-3533","authenticated-orcid":false,"given":"Banghua","family":"Zhu","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering and Computer Sciences, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3766-8031","authenticated-orcid":false,"given":"Jiantao","family":"Jiao","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering and Computer Sciences and the Department of Statistics, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Martin J.","family":"Wainwright","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering and Computer Sciences and the Department of Statistics, University of California at Berkeley (UC Berkeley), Berkeley, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1017\/9781108571401"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1137\/S0097539701398375"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1007\/978-3-319-51394-2_25"},{"volume-title":"Reinforcement Learning: An Introduction","year":"2018","author":"Sutton","key":"ref4"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.2200\/s00268ed1v01y201005aim009"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1145\/1935826.1935878"},{"key":"ref7","first-page":"1806","article-title":"Personalized ad recommendation systems for life-time value optimization with guarantees","volume-title":"Proc. 24th Int. Joint Conf. Artif. Intell.","author":"Theocharous"},{"key":"ref8","first-page":"5437","article-title":"Off-policy evaluation via off-policy classification","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Irpan"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1111\/1468-0262.00442"},{"key":"ref10","first-page":"608","article-title":"Toward minimax off-policy value estimation","volume-title":"Proc. 8th Int. Conf. Artif. Intell. Statist.","volume":"38","author":"Li"},{"key":"ref11","first-page":"3589","article-title":"Optimal and adaptive off-policy evaluation in contextual bandits","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1080\/01621459.1952.10483446"},{"key":"ref13","article-title":"Hyperparameter selection for offline reinforcement learning","author":"Le Paine","year":"2020","journal-title":"arXiv:2007.09055"},{"key":"ref14","first-page":"2139","article-title":"Data-efficient off-policy policy evaluation for reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Thomas"},{"key":"ref15","first-page":"652","article-title":"Doubly robust off-policy value evaluation for reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jiang"},{"key":"ref16","first-page":"3948","article-title":"Asymptotically efficient off-policy evaluation for tabular reinforcement learning","volume-title":"Proc. 23rd Int. Conf. Artif. Intell. Statist.","author":"Yin"},{"key":"ref17","first-page":"2701","article-title":"Minimax-optimal off-policy evaluation with linear function approximation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Duan"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1137\/1131054"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1007\/s004409970006"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1214\/10-AOS849"},{"key":"ref21","first-page":"179","article-title":"A CLT and tight lower bounds for estimating entropy","volume-title":"Proc. Electron. Colloq. Comput. Complex.","volume":"17","author":"Valiant"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1109\/TIT.2016.2548468"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1109\/TIT.2015.2412945"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1109\/JSAIT.2020.3041036"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1145\/1993636.1993727"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.1145\/3125643"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.1214\/17-AOS1665"},{"doi-asserted-by":"publisher","key":"ref28","DOI":"10.1214\/07-STS227B"},{"issue":"1","key":"ref29","first-page":"3207","article-title":"Counterfactual reasoning and learning systems: The example of computational advertising","volume":"14","author":"Bottou","year":"2013","journal-title":"J. Mach. Learn. Res."},{"key":"ref30","first-page":"305","article-title":"Truncated importance sampling for reinforcement learning with experience replay","volume-title":"Proc. Int. Multiconf. Comput. Sci. Inf. Technol.","author":"Wawrzynski"},{"doi-asserted-by":"publisher","key":"ref31","DOI":"10.1198\/106186008X320456"},{"doi-asserted-by":"publisher","key":"ref32","DOI":"10.1017\/9781108627771"},{"key":"ref33","volume":"1442","author":"Fiat","year":"1998","journal-title":"Online Algorithms: The State of the Art"},{"doi-asserted-by":"publisher","key":"ref34","DOI":"10.1016\/c2013-0-05307-8"},{"doi-asserted-by":"publisher","key":"ref35","DOI":"10.1145\/2827872"},{"volume-title":"Minimax Theory","year":"2008","author":"Lafferty","key":"ref36"},{"doi-asserted-by":"publisher","key":"ref37","DOI":"10.1515\/9783110850826"},{"doi-asserted-by":"publisher","key":"ref38","DOI":"10.1109\/TIT.2015.2478816"}],"container-title":["IEEE Transactions on Information Theory"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/18\/9829223\/9741776-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/18\/9829223\/09741776.pdf?arnumber=9741776","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,18]],"date-time":"2024-01-18T00:44:53Z","timestamp":1705538693000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9741776\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8]]},"references-count":38,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/tit.2022.3162335","relation":{},"ISSN":["0018-9448","1557-9654"],"issn-type":[{"type":"print","value":"0018-9448"},{"type":"electronic","value":"1557-9654"}],"subject":[],"published":{"date-parts":[[2022,8]]}}}