{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,24]],"date-time":"2025-10-24T08:30:41Z","timestamp":1761294641235,"version":"3.37.3"},"reference-count":116,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1109\/tpami.2023.3305381","type":"journal-article","created":{"date-parts":[[2023,8,18]],"date-time":"2023-08-18T17:18:41Z","timestamp":1692379121000},"page":"15308-15327","source":"Crossref","is-referenced-by-count":2,"title":["PAC-Bayes Bounds for Bandit Problems: A Survey and Experimental Comparison"],"prefix":"10.1109","volume":"45","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4089-9087","authenticated-orcid":false,"given":"Hamish","family":"Flynn","sequence":"first","affiliation":[{"name":"Bosch Center for Artificial Intelligence, Renningen, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9446-2315","authenticated-orcid":false,"given":"David","family":"Reeb","sequence":"additional","affiliation":[{"name":"Bosch Center for Artificial Intelligence, Renningen, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6293-3656","authenticated-orcid":false,"given":"Melih","family":"Kandemir","sequence":"additional","affiliation":[{"name":"Department of Mathematics and Computer Science (IMADA), University of Southern Denmark, Odense, Denmark"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5266-8091","authenticated-orcid":false,"given":"Jan","family":"Peters","sequence":"additional","affiliation":[{"name":"Intelligent Autonomous Systems Group, Technische Universit&#x00E4;t Darmstadt, Darmstadt, Germany"}]}],"member":"263","reference":[{"key":"ref57","article-title":"A tutorial on PAC-Bayesian theory","author":"laviolette","year":"2017","journal-title":"Proc Talk NIPS Workshop (Almost) 50 Shades PAC-Bayesian Learn PAC-Bayesian Trends Insights"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1017\/9781108571401"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-16108-7_13"},{"key":"ref58","article-title":"Dichotomize and generalize: PAC-Bayesian binary activated deep neural networks","author":"letarte","year":"2019","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref53","first-page":"809","article-title":"(Not) bounding the true error","author":"langford","year":"2002","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref52","first-page":"640","article-title":"Confident off-policy evaluation and selection through self-normalized importance weighting","author":"kuzborskij","year":"2021","journal-title":"Proc Int Conf Artif Intell Statist"},{"key":"ref55","first-page":"817","article-title":"The Epoch-Greedy algorithm for multi-armed bandits with side information","author":"langford","year":"2007","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref54","first-page":"439","article-title":"PAC-Bayes & margins","author":"langford","year":"2002","journal-title":"Proc Int Conf Neural Inf Process"},{"article-title":"Efron-stein PAC-Bayesian inequalities","year":"2019","author":"kuzborskij","key":"ref51"},{"key":"ref50","first-page":"2575","article-title":"Variational dropout and the local reparameterization trick","author":"kingma","year":"2015","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.1.1"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.2307\/1269620"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1198\/106186008X320456"},{"key":"ref42","first-page":"25725","article-title":"Online PAC-Bayesian learning","author":"haddouche","year":"2022","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref41","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref44","first-page":"206","article-title":"A PAC-Bayesian margin bound for linear classifiers: Why SVMs work","author":"herbrich","year":"2000","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref43","article-title":"PAC-Bayes generalisation bounds for heavy-tailed losses through supermartingales","author":"haddouche","year":"2023","journal-title":"Trans Mach Learn Res"},{"key":"ref49","first-page":"1531","article-title":"A natural policy gradient","author":"kakade","year":"2001","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref8","first-page":"397","article-title":"Using confidence bounds for exploitation-exploration trade-offs","volume":"3","author":"auer","year":"2002","journal-title":"J Mach Learn Res"},{"key":"ref7","first-page":"1","article-title":"Minimax policies for adversarial and stochastic bandits","author":"audibert","year":"2009","journal-title":"Proc 22nd Annu Conf Learn Theory"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"key":"ref4","first-page":"205","article-title":"Meta-learning by adjusting priors based on extended PAC-Bayes theory","author":"amit","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7503.003.0007"},{"journal-title":"Neural Network Learning Theoretical Foundations","year":"2009","author":"anthony","key":"ref6"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1023\/A:1020281327116"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1145\/267460.267466"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1561\/2200000068"},{"key":"ref40","first-page":"1352","article-title":"Reinforcement learning with deep energy-based policies","author":"haarnoja","year":"2017","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref35","first-page":"4093","article-title":"How tight can PAC-Bayes be in the small data regime?","author":"foong","year":"2021","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/s10618-022-00825-4"},{"key":"ref37","first-page":"729","article-title":"Sparsity regret bounds for individual sequences in online linear regression","volume":"14","author":"gerchinovitz","year":"2013","journal-title":"J Mach Learn Res"},{"key":"ref36","first-page":"3199","article-title":"Beyond UCB: Optimal and efficient contextual bandits with regression oracles","author":"foster","year":"2020","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref31","article-title":"Computing nonvacuous generalization bounds for deep (stochastic) neural networks with many more parameters than training data","author":"dziugaite","year":"2017","journal-title":"Proc Conf Uncertainty Artif Intell"},{"key":"ref30","first-page":"67","article-title":"Contextual bandits for adapting treatment in a mouse model of de novo carcinogenesis","author":"durand","year":"2018","journal-title":"Proc Mach Learn Healthcare Conf"},{"key":"ref33","first-page":"604","article-title":"On the role of data in PAC-Bayes","author":"dziugaite","year":"2021","journal-title":"Proc Int Conf Artif Intell Statist"},{"key":"ref32","first-page":"8440","article-title":"Data-dependent PAC-Bayes priors via differential privacy","author":"dziugaite","year":"2018","journal-title":"Proc 32nd Int Conf Neural Inf Process Syst"},{"key":"ref39","article-title":"A primer on PAC-Bayesian learning","author":"guedj","year":"2019","journal-title":"Proc 2nd Congr Fr Math Soc"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553419"},{"article-title":"A unified recipe for deriving (time-uniform) PAC-Bayes bounds","year":"2023","author":"chugg","key":"ref24"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511546921"},{"key":"ref26","first-page":"355","article-title":"Stochastic linear optimization under bandit feedback","author":"dani","year":"2008","journal-title":"Proc Annu Conf Learn Theory"},{"key":"ref25","article-title":"Fast and accurate deep network learning by exponential linear units (ELUs)","author":"clevert","year":"2016","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref20","article-title":"A PAC-Bayesian approach to adaptive classification","author":"catoni","year":"2003","journal-title":"Preprint"},{"journal-title":"Pac-Bayesian Supervised Classification The Thermodynamics of Statistical Learning","year":"2007","author":"catoni","key":"ref22"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/b99352"},{"key":"ref28","article-title":"UCI machine learning repository","author":"dua","year":"2019","journal-title":"Schl Inf Comput Sci"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1002\/cpa.3160280102"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1214\/14-STS500"},{"article-title":"PAC-Bayes iterated logarithm bounds for martingale mixtures","year":"2015","author":"balsubramani","key":"ref13"},{"key":"ref12","first-page":"1019","article-title":"Covariant policy search","author":"bagnell","year":"2003","journal-title":"Proc Int Joint Conf Artif Intell"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2019.1902"},{"key":"ref14","first-page":"1515","article-title":"On Markov chain Monte Carlo methods for tall data","volume":"18","author":"bardenet","year":"2017","journal-title":"J Mach Learn Res"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2012.2211334"},{"key":"ref96","first-page":"1683","article-title":"PAC-Bayesian analysis of contextual bandits","author":"seldin","year":"2011","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.2748\/tmj\/1178243286"},{"key":"ref99","first-page":"103","article-title":"Evaluation and analysis of the performance of the EXP3 algorithm in stochastic environments","author":"seldin","year":"2013","journal-title":"Proc of European Workshop on Reinforcement Learning"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1137\/S0097539701398375"},{"key":"ref98","first-page":"98","article-title":"PAC-Bayes-Bernstein inequality for martingales and its application to multiarmed bandits","author":"seldin","year":"2012","journal-title":"Proc Workshop On-line Trading Exploration Exploitation"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.2017.1285773"},{"key":"ref16","first-page":"19","article-title":"Contextual bandit algorithms with supervised learning guarantees","author":"beygelzimer","year":"2011","journal-title":"Proc 14th Int Conf Artif Intell Statist"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1561\/2200000024"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/130385.130401"},{"key":"ref93","first-page":"233","article-title":"PAC-Bayesian generalisation error bounds for Gaussian process classification","volume":"3","author":"seeger","year":"2002","journal-title":"J Mach Learn Res"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref92"},{"article-title":"PAC-Bayesian analysis of martingales and multiarmed bandits","year":"2011","author":"seldin","key":"ref95"},{"key":"ref94","first-page":"3595","article-title":"PAC-Bayesian analysis of co-clustering and beyond","volume":"11","author":"seldin","year":"2010","journal-title":"J Mach Learn Res"},{"key":"ref91","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref90","first-page":"280","article-title":"Meta-learning reliable priors in the function space","author":"rothfuss","year":"2021","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref89","first-page":"9116","article-title":"PACOH: Bayes-optimal meta-learning with PAC-guarantees","author":"rothfuss","year":"2021","journal-title":"Proc Int Conf Mach Learn"},{"article-title":"PAC-Bayes with backprop","year":"2019","author":"rivasplata","key":"ref86"},{"key":"ref85","first-page":"9234","article-title":"PAC-Bayes bounds for stable algorithms with instance-dependent priors","author":"rivasplata","year":"2018","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1090\/S0002-9904-1952-09620-8"},{"key":"ref87","article-title":"PAC-Bayes analysis beyond the usual bounds","author":"rivasplata","year":"2020","journal-title":"Proc Int Conf Neural Inf Process"},{"journal-title":"Information and Information Stability of Random Variables and Processes","year":"1964","author":"pinsker","key":"ref82"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v24i1.7727"},{"key":"ref84","first-page":"3341","article-title":"Learning Gaussian processes by minimizing PAC-Bayesian generalization bounds","author":"reeb","year":"2018","journal-title":"Proc 32nd Int Conf Neural Inf Process Syst"},{"key":"ref83","first-page":"11702","article-title":"Bridging offline reinforcement learning and imitation learning: A tale of pessimism","author":"rashidinejad","year":"2021","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref80","first-page":"1","article-title":"Reinforcement learning for humanoid robotics","author":"peters","year":"2003","journal-title":"Proc 3rd IEEE-RAS Int Conf Humanoid Robots"},{"key":"ref79","article-title":"Progress in self-certified neural networks","author":"p\u00e9rez-ortiz","year":"2021","journal-title":"Proc Int Conf Neural Inf Process Syst Workshop Bayesian Deep Learn"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1145\/1968.1972"},{"article-title":"Learning PAC-Bayes priors for probabilistic neural networks","year":"2021","author":"p\u00e9rez-ortiz","key":"ref78"},{"article-title":"PAC-Bayes mini-tutorial: A continuous union bound","year":"2014","author":"van erven","key":"ref109"},{"key":"ref106","first-page":"109","article-title":"PAC-Bayes-empirical-Bernstein inequality","author":"tolstikhin","year":"2013","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref107","first-page":"9636","article-title":"Normalized flat minima: Exploring scale invariant definition of flat minima for neural networks using PAC-Bayesian analysis","author":"tsuzuku","year":"2020","journal-title":"Proc Int Conf Mach Learn"},{"journal-title":"Self-normalized processes limit theory and statistical applications","year":"2009","author":"de la pe\u00f1a","key":"ref75"},{"key":"ref104","first-page":"466","article-title":"A strongly quasiconvex PAC-Bayesian bound","author":"thiemann","year":"2017","journal-title":"Proc Int Conf Algorithmic Learn Theory"},{"key":"ref74","first-page":"3507","article-title":"PAC-Bayes bounds with data dependent priors","volume":"13","author":"parrado-hern\u00e1ndez","year":"2012","journal-title":"J Mach Learn Res"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.2307\/2332286"},{"key":"ref77","first-page":"1","article-title":"Tighter risk certificates for neural networks","volume":"22","author":"p\u00e9rez-ortiz","year":"2021","journal-title":"J Mach Learn Res"},{"key":"ref102","first-page":"1731","article-title":"Batch learning from logged bandit feedback through counterfactual risk minimization","volume":"16","author":"swaminathan","year":"2015","journal-title":"J Mach Learn Res"},{"key":"ref76","first-page":"991","article-title":"A PAC-Bayesian bound for lifelong learning","author":"pentina","year":"2014","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2021.3119605"},{"article-title":"User-friendly introduction to PAC-Bayes bounds","year":"2021","author":"alquier","key":"ref2"},{"key":"ref1","first-page":"1638","article-title":"Taming the monster: A fast and simple algorithm for contextual bandits","author":"agarwal","year":"2014","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1287\/mksc.2018.1129"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1561\/2200000001"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.3390\/e23101257"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1186\/s13660-015-0768-3"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2016.06.019"},{"key":"ref72","article-title":"Low-rank bandit methods for high-dimensional dynamic pricing","author":"mueller","year":"2019","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1145\/2641190.2641198"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-45167-9_16"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1145\/279943.279989"},{"article-title":"A PAC-Bayesian tutorial with a dropout bound","year":"2013","author":"mcallester","key":"ref69"},{"key":"ref64","first-page":"4125","article-title":"Bayesian counterfactual risk minimization","author":"london","year":"2019","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref115","first-page":"10802","article-title":"Fast-rate PAC-Bayes generalization bounds via shifted Rademacher processes","author":"yang","year":"2019","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.09.018"},{"journal-title":"Modeling Purposeful Adaptive Behavior with the Principle of Maximum Causal Entropy","year":"2010","author":"ziebart","key":"ref116"},{"article-title":"A note on the PAC Bayesian theorem","year":"2004","author":"maurer","key":"ref66"},{"key":"ref113","article-title":"PAC-Bayesian analysis of counterfactual risk in stochastic contextual bandits","author":"wang","year":"2019","journal-title":"Proc Multi-Disciplinary Conf Reinforcement Learn Decis Mak"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-27926-8_29"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.2307\/3214163"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2012.10.013"},{"journal-title":"Monte Carlo Strategies in Scientific Computing","year":"2001","author":"liu","key":"ref62"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10200"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10308548\/10224540.pdf?arnumber=10224540","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,11]],"date-time":"2023-12-11T19:59:56Z","timestamp":1702324796000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10224540\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12]]},"references-count":116,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2023.3305381","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"type":"print","value":"0162-8828"},{"type":"electronic","value":"2160-9292"},{"type":"electronic","value":"1939-3539"}],"subject":[],"published":{"date-parts":[[2023,12]]}}}