{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T02:29:37Z","timestamp":1730341777961,"version":"3.28.0"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,5,25]],"date-time":"2021-05-25T00:00:00Z","timestamp":1621900800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,5,25]],"date-time":"2021-05-25T00:00:00Z","timestamp":1621900800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,25]]},"DOI":"10.23919\/acc50511.2021.9483345","type":"proceedings-article","created":{"date-parts":[[2021,7,28]],"date-time":"2021-07-28T20:29:16Z","timestamp":1627504156000},"page":"4958-4963","source":"Crossref","is-referenced-by-count":0,"title":["Batched Learning in Generalized Linear Contextual Bandits with General Decision Sets"],"prefix":"10.23919","author":[{"given":"Zhimei","family":"Ren","sequence":"first","affiliation":[{"name":"Stanford University,Department of Statistics,Stanford,CA,USA,94305"}]},{"given":"Zhengyuan","family":"Zhou","sequence":"additional","affiliation":[{"name":"School of Business, New York University,NY,USA,10003"}]},{"given":"Jayant R.","family":"Kalagnanam","sequence":"additional","affiliation":[{"name":"IBM Research,Yorktown Heights,NY,USA,10598"}]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2012.01.001"},{"journal-title":"Mostly exploration-free algorithms for contextual bandits","year":"2017","author":"bastani","key":"ref32"},{"journal-title":"On worst-case regret of linear thompson sampling","year":"2020","author":"hamidi","key":"ref31"},{"journal-title":"Sequential batch learning in finite-action linear contextual bandits","year":"2020","author":"han","key":"ref30"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.18637\/jss.v033.i01"},{"key":"ref35","first-page":"2116","article-title":"Combinatorial bandits revisited","author":"combes","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref34","article-title":"Thompson sampling for combinatorial bandits and its application to online feature selection","author":"durand","year":"0","journal-title":"Workshops at the Twenty-Eighth AAAI Conference on Artificial Intelligence"},{"key":"ref10","first-page":"586","article-title":"Parametric bandits: The generalized linear case","author":"filippi","year":"2010","journal-title":"Advances in neural information processing systems"},{"journal-title":"Nonparametric Bandits with Covariates","year":"2010","author":"rigollet","key":"ref11"},{"key":"ref12","first-page":"208","article-title":"Contextual bandits with linear payoff functions","author":"chu","year":"0","journal-title":"Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1287\/11-SSY032"},{"key":"ref14","first-page":"127","article-title":"Thompson sampling for contextual bandits with linear payoffs","author":"agrawal","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref15","first-page":"2442","article-title":"An information-theoretic analysis of thompson sampling","volume":"17","author":"russo","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref16","first-page":"99","article-title":"Scalable generalized linear bandits: Online computation and hashing","author":"jun","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref17","first-page":"2071","article-title":"Provably optimal algorithms for generalized linear contextual bandits","author":"li","year":"0","journal-title":"Proceedings of the 34th International Conference on Machine Learning-Volume 70 JMLR org"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1214\/17-EJS1341SI"},{"key":"ref19","first-page":"1097","article-title":"Doubly robust policy evaluation and learning","author":"dud\u00edk","year":"0","journal-title":"Proceedings of the 28th International Conference on Machine Learning"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1214\/15-AOS1381"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2019.1902"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1090\/S0002-9904-1952-09620-8"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/2623330.2623634"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1561\/2200000024"},{"key":"ref29","first-page":"501","article-title":"Batched multi-armed bandits problem","author":"gao","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1287\/mksc.2016.1023"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1561\/2200000068"},{"key":"ref7","first-page":"28","article-title":"Bandit algorithms","author":"lattimore","year":"2018","journal-title":"Preprint"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772758"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1287\/moor.1100.0446"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1287\/opre.1070.0427"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1002\/sta.411"},{"key":"ref22","first-page":"1731","article-title":"Batch learning from logged bandit feedback through counterfactual risk minimization","volume":"16","author":"swaminathan","year":"2015","journal-title":"Journal of Machine Learning Research"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/asu050"},{"key":"ref24","article-title":"Balanced policy evaluation and learning","author":"kallus","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref23","first-page":"1977","article-title":"BISTRO: An efficient relaxation-based method for contextual bandits","author":"rakhlin","year":"0","journal-title":"Proceedings of the International Conference on Machine Learning"},{"key":"ref26","article-title":"Deep learning with logged bandit feedback","author":"joachims","year":"2018","journal-title":"International Conference on Learning Representations"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.3982\/ECTA13288"}],"event":{"name":"2021 American Control Conference (ACC)","start":{"date-parts":[[2021,5,25]]},"location":"New Orleans, LA, USA","end":{"date-parts":[[2021,5,28]]}},"container-title":["2021 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9482409\/9482614\/09483345.pdf?arnumber=9483345","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,2]],"date-time":"2022-08-02T23:30:50Z","timestamp":1659483050000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9483345\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,25]]},"references-count":36,"URL":"https:\/\/doi.org\/10.23919\/acc50511.2021.9483345","relation":{},"subject":[],"published":{"date-parts":[[2021,5,25]]}}}