{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T04:37:46Z","timestamp":1773376666460,"version":"3.50.1"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,25]],"date-time":"2023-06-25T00:00:00Z","timestamp":1687651200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,25]],"date-time":"2023-06-25T00:00:00Z","timestamp":1687651200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,25]]},"DOI":"10.1109\/isit54713.2023.10206792","type":"proceedings-article","created":{"date-parts":[[2023,8,22]],"date-time":"2023-08-22T13:31:15Z","timestamp":1692711075000},"page":"1306-1311","source":"Crossref","is-referenced-by-count":1,"title":["Thompson Sampling Regret Bounds for Contextual Bandits with sub-Gaussian rewards"],"prefix":"10.1109","author":[{"given":"Amaury","family":"Gouverneur","sequence":"first","affiliation":[{"name":"KTH Royal Institute of Technology,Division of Information Science and Engineering (ISE)"}]},{"given":"Borja","family":"Rodr\u00edguez-G\u00e1lvez","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Division of Information Science and Engineering (ISE)"}]},{"given":"Tobias J.","family":"Oechtering","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Division of Information Science and Engineering (ISE)"}]},{"given":"Mikael","family":"Skoglund","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology,Division of Information Science and Engineering (ISE)"}]}],"member":"263","reference":[{"key":"ref13","first-page":"19","article-title":"Contextual bandit learning with predictable rewards","author":"agarwal","year":"2012","journal-title":"Artificial Intelligence and Statistics"},{"key":"ref12","first-page":"208","article-title":"Contextual bandits with linear payoff functions","author":"chu","year":"2011","journal-title":"Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics"},{"key":"ref15","first-page":"18 907","article-title":"Efficient first-order contextual bandits: Prediction, allocation, and triangular discrimination","volume":"34","author":"foster","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref14","first-page":"3199","article-title":"Beyond ucb: Optimal and efficient contextual bandits with regression oracles","author":"foster","year":"2020","journal-title":"International Conference on Machine Learning"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-7970-4"},{"key":"ref11","article-title":"Efficient optimal learning for contextual bandits","author":"dudik","year":"2011"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/s00453-003-1038-1"},{"key":"ref2","article-title":"The epoch-greedy algorithm for multi-armed bandits with side information","volume":"20","author":"langford","year":"2007","journal-title":"Advances in neural information processing systems"},{"key":"ref1","article-title":"Lifting the information ratio: An information-theoretic analysis of thompson sampling for contextual bandits","author":"neu","year":"2022"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/25.3-4.285"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1137\/21M140924X"},{"key":"ref19","article-title":"An empirical evaluation of Thompson sampling","volume":"24","author":"chapelle","year":"2011","journal-title":"Advances in neural information processing systems"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1002\/asmb.874"},{"key":"ref24","article-title":"Low-rank bandit methods for high-dimensional dynamic pricing","volume":"32","author":"mueller","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/Allerton49937.2022.9929353"},{"key":"ref26","article-title":"Information-theoretic generalization bounds for sgld via data-dependent estimates","volume":"32","author":"negrea","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref25","first-page":"19","article-title":"Contextual bandit algorithms with supervised learning guarantees","author":"beygelzimer","year":"2011","journal-title":"Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics"},{"key":"ref20","article-title":"Learning to optimize via information-directed sampling","volume":"27","author":"russo","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref22","article-title":"An information-theoretic analysis for thompson sampling with many actions","volume":"31","author":"dong","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref21","first-page":"2442","article-title":"An information-theoretic analysis of Thompson sampling","volume":"17","author":"russo","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref28","first-page":"169","article-title":"The safe bayesian: learning the learning rate via the mixability gap","author":"gr\u00fcnwald","year":"2012","journal-title":"Algorithmic Learning Theory 23rd International Conference ALT 2012"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21236\/ADA623999"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-0865-5_26"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143956"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/BF00993348"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CEC48606.2020.9185782"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1979.10481033"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1214\/aos\/1176348382"},{"key":"ref6","author":"gullapalli","year":"1990","journal-title":"Associate reinforcement learning of real-valued functions"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1985.6313371"}],"event":{"name":"2023 IEEE International Symposium on Information Theory (ISIT)","location":"Taipei, Taiwan","start":{"date-parts":[[2023,6,25]]},"end":{"date-parts":[[2023,6,30]]}},"container-title":["2023 IEEE International Symposium on Information Theory (ISIT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10206429\/10206441\/10206792.pdf?arnumber=10206792","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T20:27:28Z","timestamp":1773347248000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10206792\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,25]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/isit54713.2023.10206792","relation":{},"subject":[],"published":{"date-parts":[[2023,6,25]]}}}