{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,2]],"date-time":"2025-11-02T16:56:26Z","timestamp":1762102586270,"version":"3.37.3"},"reference-count":36,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000893","name":"Math + X award from the Simons Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000893","id-type":"DOI","asserted-by":"publisher"}]},{"name":"JHU","award":["2003514594"],"award-info":[{"award-number":["2003514594"]}]},{"DOI":"10.13039\/100000183","name":"ARO","doi-asserted-by":"publisher","award":["W911NF-17-1-0304"],"award-info":[{"award-number":["W911NF-17-1-0304"]}],"id":[{"id":"10.13039\/100000183","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["DMS 1712800"],"award-info":[{"award-number":["DMS 1712800"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Discovery Innovation Fund for Biomedical Data Sciences"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Control Syst. Lett."],"published-print":{"date-parts":[[2022]]},"DOI":"10.1109\/lcsys.2020.3047601","type":"journal-article","created":{"date-parts":[[2020,12,28]],"date-time":"2020-12-28T20:48:29Z","timestamp":1609188509000},"page":"37-42","source":"Crossref","is-referenced-by-count":4,"title":["Batched Learning in Generalized Linear Contextual Bandits With General Decision Sets"],"prefix":"10.1109","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2872-5842","authenticated-orcid":false,"given":"Zhimei","family":"Ren","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0005-9411","authenticated-orcid":false,"given":"Zhengyuan","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Jayant R.","family":"Kalagnanam","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2012.01.001"},{"journal-title":"Mostly exploration-free algorithms for contextual bandits","year":"2017","author":"bastani","key":"ref32"},{"journal-title":"On worst-case regret of linear thompson sampling","year":"2020","author":"hamidi","key":"ref31"},{"journal-title":"Sequential batch learning in finite-action linear contextual bandits","year":"2020","author":"han","key":"ref30"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.18637\/jss.v033.i01"},{"key":"ref35","first-page":"2116","article-title":"Combinatorial bandits revisited","author":"combes","year":"2015","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref34","first-page":"6","article-title":"Thompson sampling for combinatorial bandits and its application to online feature selection","author":"durand","year":"2014","journal-title":"Proc Workshops 28th AAAI Conf Artif Intell"},{"key":"ref10","first-page":"586","article-title":"Parametric bandits: The generalized linear case","author":"filippi","year":"2010","journal-title":"Proc Adv Neural Inf Process Syst"},{"journal-title":"Nonparametric Bandits with Covariates","year":"2010","author":"rigollet","key":"ref11"},{"key":"ref12","first-page":"208","article-title":"Contextual bandits with linear payoff functions","author":"chu","year":"2011","journal-title":"Proc 14th Int Conf Artif Intell Stat"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1287\/11-SSY032"},{"key":"ref14","first-page":"127","article-title":"Thompson sampling for contextual bandits with linear payoffs","author":"agrawal","year":"2013","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref15","first-page":"2442","article-title":"An information-theoretic analysis of Thompson sampling","volume":"17","author":"russo","year":"2016","journal-title":"J Mach Learn Res"},{"key":"ref16","first-page":"99","article-title":"Scalable generalized linear bandits: Online computation and hashing","author":"jun","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref17","first-page":"2071","article-title":"Provably optimal algorithms for generalized linear contextual bandits","volume":"70","author":"li","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1214\/17-EJS1341SI"},{"key":"ref19","first-page":"1097","article-title":"Doubly robust policy evaluation and learning","author":"dud\u00edk","year":"2011","journal-title":"Proc 28th Int Conf Mach Learn"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1214\/15-AOS1381"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2019.1902"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1090\/S0002-9904-1952-09620-8"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/2623330.2623634"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1561\/2200000024"},{"key":"ref29","first-page":"501","article-title":"Batched multi-armed bandits problem","author":"gao","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1287\/mksc.2016.1023"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1561\/2200000068"},{"key":"ref7","first-page":"28","author":"lattimore","year":"2018","journal-title":"Bandit Algorithms"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772758"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1287\/moor.1100.0446"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1287\/opre.1070.0427"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1002\/sta.411"},{"key":"ref22","first-page":"1731","article-title":"Batch learning from logged bandit feedback through counterfactual risk minimization","volume":"16","author":"swaminathan","year":"2015","journal-title":"J Mach Learn Res"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/asu050"},{"key":"ref24","first-page":"8909","article-title":"Balanced policy evaluation and learning","author":"kallus","year":"2018","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref23","first-page":"1977","article-title":"BISTRO: An efficient relaxation-based method for contextual bandits","author":"rakhlin","year":"2016","journal-title":"Proc Int Conf Machine Learning"},{"key":"ref26","first-page":"1","article-title":"Deep learning with logged bandit feedback","author":"joachims","year":"2018","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.3982\/ECTA13288"}],"container-title":["IEEE Control Systems Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7782633\/9462165\/09309392.pdf?arnumber=9309392","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,8]],"date-time":"2022-04-08T18:01:43Z","timestamp":1649440903000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9309392\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/lcsys.2020.3047601","relation":{},"ISSN":["2475-1456"],"issn-type":[{"type":"electronic","value":"2475-1456"}],"subject":[],"published":{"date-parts":[[2022]]}}}