{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T07:22:41Z","timestamp":1774941761534,"version":"3.50.1"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,3,1]],"date-time":"2022-03-01T00:00:00Z","timestamp":1646092800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,3,1]],"date-time":"2022-03-01T00:00:00Z","timestamp":1646092800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Data Min Knowl Disc"],"published-print":{"date-parts":[[2022,3]]},"DOI":"10.1007\/s10618-022-00825-4","type":"journal-article","created":{"date-parts":[[2022,3,16]],"date-time":"2022-03-16T05:02:53Z","timestamp":1647406973000},"page":"841-876","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["PAC-Bayesian lifelong learning for multi-armed bandits"],"prefix":"10.1007","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4089-9087","authenticated-orcid":false,"given":"Hamish","family":"Flynn","sequence":"first","affiliation":[]},{"given":"David","family":"Reeb","sequence":"additional","affiliation":[]},{"given":"Melih","family":"Kandemir","sequence":"additional","affiliation":[]},{"given":"Jan","family":"Peters","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,3,16]]},"reference":[{"key":"825_CR1","unstructured":"Amit R, Mier R (2018) Meta-learning by adjusting priors based on extended PAC-Bayes theory. Int Conf Mach Learn 205\u2013214"},{"key":"825_CR2","unstructured":"Azar MG, Lazaric A, Brunskill E (2013) Sequential transfer in multi-armed bandit with finite set of models. Adv Neural Inf Process Syst"},{"key":"825_CR3","first-page":"357","volume":"19","author":"K Azuma","year":"1967","unstructured":"Azuma K (1967) Weighted sums of certain dependent random variables. Tohoku Math J Second Ser 19:357\u2013367","journal-title":"Tohoku Math J Second Ser"},{"key":"825_CR4","unstructured":"Balsubramani A (2015) PAC-Bayes iterated logarithm bounds for martingale mixtures. arXiv preprint. arXiv:1506.06573"},{"key":"825_CR5","doi-asserted-by":"crossref","unstructured":"Banerjee A (2006) On Bayesian bounds. In: Proceedings of the 23rd international conference on machine learning, pp 81\u201388","DOI":"10.1145\/1143844.1143855"},{"key":"825_CR6","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1613\/jair.731","volume":"12","author":"J Baxter","year":"2000","unstructured":"Baxter J (2000) A model of inductive bias learning. J Artif Intell Res 12:149\u2013198","journal-title":"J Artif Intell Res"},{"key":"825_CR7","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1007\/s10994-009-5152-4","volume":"79","author":"S Ben-David","year":"2010","unstructured":"Ben-David S, Blitzer J, Crammer K, Kulesza A, Pereira F, Vaughan JW (2010) A theory of learning from different domains. Mach Learn 79:151\u2013175","journal-title":"Mach Learn"},{"key":"825_CR8","unstructured":"Brunskill E, Li L (2013) Sample complexity of multi-task reinforcement learning. In: Proceedings of the twenty-ninth conference on uncertainty in artificial intelligence"},{"key":"825_CR9","unstructured":"Burda Y, Grosse R, Salakhutdinov R (2016) Importance weighted autoencoders. Int Conf Learn Represen"},{"key":"825_CR10","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1023\/A:1007379606734","volume":"28","author":"R Caruana","year":"1997","unstructured":"Caruana R (1997) Multitask learning. Mach Learn 28:41\u201375","journal-title":"Mach Learn"},{"key":"825_CR11","unstructured":"Catoni O (2004) Statistical learning theory and stochastic optimization: Ecole d\u2019Et\u00e9 de Probabilit\u00e9s de Saint-Flour, XXXI-2001. Springer Science & Business Media"},{"key":"825_CR12","doi-asserted-by":"crossref","unstructured":"Cesa-Bianchi N, Lugosi G (2006) Prediction, learning and games. Cambridge University Press","DOI":"10.1017\/CBO9780511546921"},{"key":"825_CR13","unstructured":"D\u2019Eramo C, Tateo D, Bonarini A, Restelli M, Peters J (2020) Sharing knowledge in multi-task deep reinforcement learning. Int Conf Learn Represen (ICLR)"},{"key":"825_CR14","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1002\/cpa.3160280102","volume":"28","author":"MD Donsker","year":"1975","unstructured":"Donsker MD, Varadhan SRS (1975) Asymptotic evaluation of certain Markov process expectations for large time I. Commun Pure Appl Math 28:1\u201347","journal-title":"Commun Pure Appl Math"},{"key":"825_CR15","doi-asserted-by":"crossref","unstructured":"Dud\u00edk M, Erhan D, Langford J, Li L (2014) Doubly robust policy evaluation and optimization. Stat Sci","DOI":"10.1214\/14-STS500"},{"key":"825_CR16","first-page":"1624","volume":"23","author":"M Fard","year":"2010","unstructured":"Fard M, Pineau J (2010) PAC-Bayesian model selection for reinforcement learning. Adv Neural Inf Process Syst 23:1624\u20131632","journal-title":"Adv Neural Inf Process Syst"},{"key":"825_CR17","unstructured":"Fard M, Pineau J, Szepesv\u00e1ri C (2011) PAC-Bayesian policy evaluation for reinforcement learning. In: Proceedings of the twenty-seventh conference on uncertainty in artificial intelligence, pp 195\u2013202"},{"key":"825_CR18","unstructured":"Farid A, Veer S, Majumdar A (2021) Task-driven out-of-distribution detection with statistical guarantees for robot learning. Proc Conf Robot Learn"},{"key":"825_CR19","unstructured":"Germain P, Habrard A, Laviolette F, Morvant E (2013) A PAC-Bayesian approach for domain adaptation with specialization to linear classifiers. Int Conf Mach Learn 736\u2013748"},{"key":"825_CR20","unstructured":"Germain P, Habrard A, Laviolette F, Morvant E (2016) A new PAC-Bayesian perspective on domain adaptation. Int Conf Mach Learn 859\u2013868"},{"key":"825_CR21","doi-asserted-by":"crossref","unstructured":"Germain P, Habrard A, Laviolette F, Morvant. E (2020) PAC-Bayes and domain adaptation. Neurocomputing 379\u2013397","DOI":"10.1016\/j.neucom.2019.10.105"},{"key":"825_CR22","unstructured":"Guedj B, Shawe-Taylor J (2019) A primer on PAC-Bayesian learning. In: Proceedings of the second congress of the French mathematical society"},{"key":"825_CR23","doi-asserted-by":"crossref","unstructured":"Hoeffding W (1994) Probability inequalities for sums of bounded random variables. The Collected Works of Wassily Hoeffding, pp 409\u2013426","DOI":"10.1007\/978-1-4612-0865-5_26"},{"key":"825_CR24","unstructured":"Kingma DP Welling M (2013) Auto-encoding variational Bayes. arXiv preprint. arXiv:1312.6114"},{"key":"825_CR25","unstructured":"Kingma DP, Ba J (2014) Adam: a method for stochastic optimization. arXiv preprint. arXiv:1412.6980"},{"key":"825_CR26","unstructured":"Kuzborskij I, Szepesv\u00e1ri C (2019) Efron-Stein PAC-Bayesian inequalities. arXiv preprint. arXiv:1909.01931"},{"key":"825_CR27","unstructured":"Lazaric A, Restelli M (2011) Transfer from multiple MDPs. Adv Neural Inf Process Syst 1746\u20131754"},{"key":"825_CR28","doi-asserted-by":"crossref","unstructured":"Li C, Chen C, Carlson D, Carin L (2016) Preconditioned stochastic gradient langevin dynamics for deep neural networks. In: Proceedings of the AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v30i1.10200"},{"key":"825_CR29","unstructured":"London B, Sandler T (2019) Bayesian counterfactual risk minimization. Int Conf Mach Learn"},{"key":"825_CR30","unstructured":"Majumdar A, Goldstein M (2018) PAC-Bayes Control: synthesizing controllers that provably generalize to novel environments. Conf Robot Learn 293\u2013305"},{"key":"825_CR31","doi-asserted-by":"crossref","unstructured":"McAllester DA (1998) Some PAC-Bayesian theorems. In: Proceedings of the international conference on computational learning theory (COLT)","DOI":"10.1145\/279943.279989"},{"key":"825_CR32","unstructured":"Pentina A, Lampert C (2014) A PAC-Bayesian bound for lifelong learning. Int Conf Mach Learn 991\u2013999"},{"key":"825_CR33","unstructured":"Pentina A, Lampert C (2015) Lifelong learning with non-iid tasks. Adv Neural Inf Process Syst 1540\u20131548"},{"key":"825_CR34","unstructured":"Ren A, Veer S, Majumdar A (2020) Generalization guarantees for imitation learning. Proc Conf Robot Learn"},{"key":"825_CR35","unstructured":"Rothfuss J, Fortuin V, Krause A (2020) PACOH: Bayes-optimal meta-learning with PAC-guarantees, arXiv preprint. arXiv:2002.05551"},{"key":"825_CR36","doi-asserted-by":"crossref","unstructured":"Rubinstein RY (1981) Simulation and the Monte Carlo method, Wiley","DOI":"10.1002\/9780470316511"},{"key":"825_CR37","unstructured":"Schmidhuber J (1987) Evolutionary principles in self-referential learning, Technische Universit\u00e4t M\u00fcnchen"},{"key":"825_CR38","doi-asserted-by":"crossref","unstructured":"Seldin Y, Laviolette F, Shawe-Taylor J, Peters J, Auer P (2011) PAC-Bayesian analysis of martingales and multiarmed bandits. arXiv preprint. arXiv:1105.2416","DOI":"10.1109\/TIT.2012.2211334"},{"key":"825_CR39","first-page":"1683","volume":"24","author":"Y Seldin","year":"2011","unstructured":"Seldin Y, Auer P, Shawe-taylor J, Ortner R, Laviolette F (2011) PAC-Bayesian Analysis of Contextual Bandits. Adv Neural Inf Process Syst 24:1683\u20131691","journal-title":"Adv Neural Inf Process Syst"},{"key":"825_CR40","doi-asserted-by":"crossref","unstructured":"Seldin Y, Laviolette F, Cesa-Bianchi N, Shawe-Taylor J, Auer P (2012) PAC-Bayesian inequalities for martingales. IEEE Trans Inf Theory","DOI":"10.1109\/TIT.2012.2211334"},{"key":"825_CR41","first-page":"98","volume":"26","author":"Y Seldin","year":"2012","unstructured":"Seldin Y, Cesa-Bianchi N, Auer P, Laviolette F, Shawe-Taylor J (2012) PAC-Bayes-Bernstein inequality for martingales and its application to multiarmed bandits. JMLR Workshop Conf Proc 26:98\u2013111","journal-title":"JMLR Workshop Conf Proc"},{"key":"825_CR42","doi-asserted-by":"crossref","unstructured":"Shawe-Taylor J, Williamson RC (1997) A PAC analysis of a Bayesian estimator. In: Proceedings of the tenth annual conference on Computational learning theory, pp 2\u20139","DOI":"10.1145\/267460.267466"},{"key":"825_CR43","unstructured":"Soare M, Alsharif O, Lazaric A, Pineau J (2014) Multi-task linear bandits. In: NIPS2014 workshop on transfer and multi-task learning: theory meets practice"},{"key":"825_CR44","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1016\/0921-8890(95)00004-Y","volume":"15","author":"S Thrun","year":"1995","unstructured":"Thrun S, Mitchell TM (1995) Lifelong robot learning. Robot Auton Syst 15:25\u201346","journal-title":"Robot Auton Syst"},{"key":"825_CR45","unstructured":"Veer S, Majumdar A (2020) Probably approximately correct vision-based planning using motion primitives. Proc Conf Robot Learn"},{"key":"825_CR46","doi-asserted-by":"crossref","unstructured":"Wang Z, Shen L, Miao Y, Chen S, Xu W (2015) PAC-Bayesian inequalities of some random variables sequences. J Inequ Appl 1\u20138","DOI":"10.1186\/s13660-015-0768-3"}],"container-title":["Data Mining and Knowledge Discovery"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-022-00825-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10618-022-00825-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10618-022-00825-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,29]],"date-time":"2023-01-29T07:12:42Z","timestamp":1674976362000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10618-022-00825-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,3]]},"references-count":46,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2022,3]]}},"alternative-id":["825"],"URL":"https:\/\/doi.org\/10.1007\/s10618-022-00825-4","relation":{},"ISSN":["1384-5810","1573-756X"],"issn-type":[{"value":"1384-5810","type":"print"},{"value":"1573-756X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,3]]},"assertion":[{"value":"5 May 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 February 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 March 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflicts of interest to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}