{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T05:57:42Z","timestamp":1771653462247,"version":"3.50.1"},"reference-count":64,"publisher":"Springer Science and Business Media LLC","issue":"22","license":[{"start":{"date-parts":[[2023,9,2]],"date-time":"2023-09-02T00:00:00Z","timestamp":1693612800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,9,2]],"date-time":"2023-09-02T00:00:00Z","timestamp":1693612800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62206151"],"award-info":[{"award-number":["62206151"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012152","name":"National Postdoctoral Program for Innovative Talents","doi-asserted-by":"publisher","award":["BX20220167"],"award-info":[{"award-number":["BX20220167"]}],"id":[{"id":"10.13039\/501100012152","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012152","name":"National Postdoctoral Program for Innovative Talents","doi-asserted-by":"publisher","award":["62072355"],"award-info":[{"award-number":["62072355"]}],"id":[{"id":"10.13039\/501100012152","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100015401","name":"Key Research and Development Projects of Shaanxi Province","doi-asserted-by":"publisher","award":["2022KWZ-10"],"award-info":[{"award-number":["2022KWZ-10"]}],"id":[{"id":"10.13039\/501100015401","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,11]]},"DOI":"10.1007\/s10489-023-04911-y","type":"journal-article","created":{"date-parts":[[2023,9,2]],"date-time":"2023-09-02T01:02:07Z","timestamp":1693616527000},"page":"27128-27147","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["PAC-Bayesian offline Meta-reinforcement learning"],"prefix":"10.1007","volume":"53","author":[{"given":"Zheng","family":"Sun","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chenheng","family":"Jing","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3181-6881","authenticated-orcid":false,"given":"Shangqi","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lingling","family":"An","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,9,2]]},"reference":[{"key":"4911_CR1","unstructured":"Amit R, Meir R (2018) Meta-learning by adjusting priors based on extended pac-bayes theory. In: International Conference on Machine Learning, PMLR, pp 205\u2013214"},{"key":"4911_CR2","doi-asserted-by":"crossref","unstructured":"Arriba-P\u00e9rez F, Garc\u00eda-M\u00e9ndez S, Gonz\u00e1lez-Casta\u00f1o FJ, et\u00a0al (2022) Automatic detection of cognitive impairment in elderly people using an entertainment chatbot with natural language processing capabilities. J Ambient Intell Human Comput pp 1\u201316","DOI":"10.1007\/s12652-022-03849-2"},{"issue":"2","key":"4911_CR3","doi-asserted-by":"publisher","first-page":"1471","DOI":"10.1109\/LRA.2021.3057046","volume":"6","author":"S Belkhale","year":"2021","unstructured":"Belkhale S, Li R, Kahn G et al (2021) Model-based meta-reinforcement learning for flight with suspended payloads. IEEE Robot Autom Lett 6(2):1471\u20131478","journal-title":"IEEE Robot Autom Lett"},{"key":"4911_CR4","unstructured":"Brockman G, Cheung V, Pettersson L, et\u00a0al (2016) Openai gym. arXiv e-prints pp arXiv\u20131606"},{"key":"4911_CR5","first-page":"3","volume":"1050","author":"O Catoni","year":"2007","unstructured":"Catoni O (2007) Pac-bayesian supervised classification: the thermodynamics of statistical learning. Stat 1050:3","journal-title":"Stat"},{"issue":"3","key":"4911_CR6","doi-asserted-by":"publisher","first-page":"198","DOI":"10.1016\/j.irbm.2020.05.002","volume":"42","author":"FR Dhanaseelan","year":"2021","unstructured":"Dhanaseelan FR, Sutha MJ (2021) Detection of breast cancer based on fuzzy frequent itemsets mining. Irbm 42(3):198\u2013206","journal-title":"Irbm"},{"key":"4911_CR7","unstructured":"Duan Y, Schulman J, Chen X, et\u00a0al (2016) Rl$$^{2}$$: Fast reinforcement learning via slow reinforcement learning. arXiv:1611.02779"},{"key":"4911_CR8","unstructured":"Fakoor R, Chaudhari P, Soatto S, et\u00a0al (2019) Meta-q-learning. In: ICLR 2019: Proceedings of the Seventh International Conference on Learning Representations"},{"key":"4911_CR9","unstructured":"Fard M, Pineau J (2010) Pac-bayesian model selection for reinforcement learning. Adv Neural Inf Process Syst 23"},{"key":"4911_CR10","unstructured":"Fard MM, Pineau J, Szepesv\u00e1ri C (2011) Pac-bayesian policy evaluation for reinforcement learning. In: Proceedings of the Twenty-Seventh Conference on Uncertainty in Artificial Intelligence, pp 195\u2013202"},{"key":"4911_CR11","unstructured":"Finn C, Levine S (2019) Meta-learning: from few-shot learning to rapid reinforcement learning. In: ICML"},{"key":"4911_CR12","unstructured":"Finn C, Abbeel P, Levine S (2017) Model-agnostic meta-learning for fast adaptation of deep networks. In: International conference on machine learning, PMLR, pp 1126\u20131135"},{"key":"4911_CR13","first-page":"20,132","volume":"34","author":"S Fujimoto","year":"2021","unstructured":"Fujimoto S, Gu SS (2021) A minimalist approach to offline reinforcement learning. Adv Neural Inf Process Syst 34:20,132-20,145","journal-title":"Adv Neural Inf Process Syst"},{"key":"4911_CR14","unstructured":"Fujimoto S, Hoof H, Meger D (2018a) Addressing function approximation error in actor-critic methods. In: International conference on machine learning, PMLR, pp 1587\u20131596"},{"key":"4911_CR15","unstructured":"Fujimoto S, Hoof H, Meger D (2018b) Addressing function approximation error in actor-critic methods. In: International conference on machine learning, PMLR, pp 1587\u20131596"},{"key":"4911_CR16","doi-asserted-by":"crossref","unstructured":"Germain P, Lacasse A, Laviolette F, et\u00a0al (2009) Pac-bayesian learning of linear classifiers. In: Proceedings of the 26th Annual International Conference on Machine Learning, pp 353\u2013360","DOI":"10.1145\/1553374.1553419"},{"key":"4911_CR17","unstructured":"Guan J, Lu Z (2022) Fast-rate pac-bayesian generalization bounds for meta-learning. In: International Conference on Machine Learning, PMLR, pp 7930\u20137948"},{"issue":"9","key":"4911_CR18","doi-asserted-by":"publisher","first-page":"5572","DOI":"10.1109\/TPAMI.2021.3069005","volume":"44","author":"S Guo","year":"2021","unstructured":"Guo S, Yan Q, Su X et al (2021) State-temporal compression in reinforcement learning with the reward-restricted geodesic metric. IEEE Trans Pattern Anal Mach Intell 44(9):5572\u20135589","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"4911_CR19","unstructured":"Haarnoja T, Zhou A, Abbeel P, et\u00a0al (2018) Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International conference on machine learning, PMLR, pp 1861\u20131870"},{"key":"4911_CR20","doi-asserted-by":"crossref","unstructured":"Hoeffding W (1994) Probability inequalities for sums of bounded random variables. In: The collected works of Wassily Hoeffding. Springer, p 409\u2013426","DOI":"10.1007\/978-1-4612-0865-5_26"},{"issue":"103","key":"4911_CR21","first-page":"811","volume":"314","author":"KC Hsu","year":"2023","unstructured":"Hsu KC, Ren AZ, Nguyen DP et al (2023) Sim-to-lab-to-real: safe reinforcement learning with shielding and generalization guarantees. Artif Intell 314(103):811","journal-title":"Artif Intell"},{"key":"4911_CR22","unstructured":"Huang B, Feng F, Lu C, et\u00a0al (2021) Adarl: What, where, and how to adapt in transfer reinforcement learning. In: International Conference on Learning Representations"},{"key":"4911_CR23","unstructured":"Humplik J, Galashov A, Hasenclever L, et\u00a0al (2019) Meta reinforcement learning as task inference. arXiv:1905.06424"},{"issue":"3","key":"4911_CR24","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1002\/rsa.20008","volume":"24","author":"S Janson","year":"2004","unstructured":"Janson S (2004) Large deviations for sums of partly dependent random variables. Random Structures & Algorithms 24(3):234\u2013248","journal-title":"Random Structures & Algorithms"},{"key":"4911_CR25","first-page":"439","volume":"15","author":"J Langford","year":"2002","unstructured":"Langford J, Shawe-Taylor J (2002) Pac-bayes & margins. Adv Neural Inf Process Syst 15:439\u2013446","journal-title":"Adv Neural Inf Process Syst"},{"key":"4911_CR26","unstructured":"Lee DD, Pham P, Largman Y, et\u00a0al (2009) Advances in neural information processing systems 22. Tech. rep., Tech. Rep., Tech. Rep"},{"key":"4911_CR27","unstructured":"Levine S, Kumar A, Tucker G, et\u00a0al (2020) Offline reinforcement learning: Tutorial, review. and Perspectives on Open Problems"},{"key":"4911_CR28","unstructured":"Li J, Vuong Q, Liu S, et\u00a0al (2020a) Multi-task batch reinforcement learning with metric learning. In: Larochelle H, Ranzato M, Hadsell R, et\u00a0al (eds) Advances in Neural Information Processing Systems, vol\u00a033. Curran Associates, Inc., pp 6197\u20136210"},{"key":"4911_CR29","unstructured":"Li L, Yang R, Luo D (2020b) Focal: Efficient fully-offline meta-reinforcement learning via distance metric learning and behavior regularization. In: International Conference on Learning Representations"},{"key":"4911_CR30","first-page":"10,161","volume":"33","author":"Z Lin","year":"2020","unstructured":"Lin Z, Thomas G, Yang G et al (2020) Model-based adversarial meta-reinforcement learning. Adv Neural Inf Process Syst 33:10,161-10,173","journal-title":"Adv Neural Inf Process Syst"},{"issue":"1","key":"4911_CR31","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1016\/j.irbm.2020.12.002","volume":"43","author":"T Liu","year":"2022","unstructured":"Liu T, Huang J, Liao T et al (2022) A hybrid deep learning model for predicting molecular subtypes of human breast cancer using multimodal data. Irbm 43(1):62\u201374","journal-title":"Irbm"},{"issue":"2\u20133","key":"4911_CR32","doi-asserted-by":"publisher","first-page":"574","DOI":"10.1177\/0278364920959444","volume":"40","author":"A Majumdar","year":"2021","unstructured":"Majumdar A, Farid A, Sonar A (2021) Pac-bayes control: learning policies that provably generalize to novel environments. Int J Robot Res 40(2\u20133):574\u2013593","journal-title":"Int J Robot Res"},{"key":"4911_CR33","doi-asserted-by":"crossref","unstructured":"McAllester DA (1999a) Pac-bayesian model averaging. In: Proceedings of the twelfth annual conference on Computational learning theory. Citeseer, pp 164\u2013170","DOI":"10.1145\/307400.307435"},{"issue":"3","key":"4911_CR34","doi-asserted-by":"publisher","first-page":"355","DOI":"10.1023\/A:1007618624809","volume":"37","author":"DA McAllester","year":"1999","unstructured":"McAllester DA (1999) Some pac-bayesian theorems. Mach Learn 37(3):355\u2013363","journal-title":"Mach Learn"},{"key":"4911_CR35","unstructured":"Mitchell E, Rafailov R, Peng XB, et\u00a0al (2021a) Offline meta-reinforcement learning with advantage weighting. In: International Conference on Machine Learning, PMLR, pp 7780\u20137791"},{"key":"4911_CR36","unstructured":"Mitchell E, Rafailov R, Peng XB, et\u00a0al (2021b) Offline meta-reinforcement learning with advantage weighting. In: International Conference on Machine Learning, PMLR, pp 7780\u20137791"},{"issue":"7540","key":"4911_CR37","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"4911_CR38","unstructured":"Mnih V, Badia AP, Mirza M, et\u00a0al (2016) Asynchronous methods for deep reinforcement learning. In: International conference on machine learning, PMLR, pp 1928\u20131937"},{"issue":"4","key":"4911_CR39","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1016\/j.irbm.2021.10.003","volume":"43","author":"D Mubarak","year":"2022","unstructured":"Mubarak D et al (2022) Classification of early stages of esophageal cancer using transfer learning. IRBM 43(4):251\u2013258","journal-title":"IRBM"},{"key":"4911_CR40","unstructured":"Nagabandi A, Clavera I, Liu S, et\u00a0al (2019) Learning to adapt in dynamic, real-world environments through meta-reinforcement learning. In: ICLR 2019: Proceedings of the Seventh International Conference on Learning Representations"},{"key":"4911_CR41","unstructured":"Neyshabur B, Bhojanapalli S, McAllester D, et\u00a0al (2017a) Exploring generalization in deep learning. Adv Neural Inf Process Syst 30"},{"key":"4911_CR42","unstructured":"Neyshabur B, Bhojanapalli S, Srebro N (2017b) A pac-bayesian approach to spectrally-normalized margin bounds for neural networks. In: International Conference on Learning Representations"},{"key":"4911_CR43","first-page":"1540","volume":"28","author":"A Pentina","year":"2015","unstructured":"Pentina A, Lampert CH (2015) Lifelong learning with non-iid tasks. Adv Neural Inf Process Syst 28:1540\u20131548","journal-title":"Adv Neural Inf Process Syst"},{"key":"4911_CR44","unstructured":"Pong VH, Nair AV, Smith LM, et\u00a0al (2022) Offline meta-reinforcement learning with online self-supervision. In: International Conference on Machine Learning, PMLR, pp 17,811\u201317,829"},{"issue":"4","key":"4911_CR45","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1016\/j.irbm.2020.05.005","volume":"42","author":"MM Rahman","year":"2021","unstructured":"Rahman MM, Ghasemi Y, Suley E et al (2021) Machine learning based computer aided diagnosis of breast cancer utilizing anthropometric and clinical features. Irbm 42(4):215\u2013226","journal-title":"Irbm"},{"key":"4911_CR46","doi-asserted-by":"crossref","unstructured":"Rajasenbagam T, Jeyanthi S, Pandian JA (2021) Detection of pneumonia infection in lungs from chest x-ray images using deep convolutional neural network and content-based image retrieval techniques. J Ambient Intell Human Comput pp 1\u20138","DOI":"10.1007\/s12652-021-03075-2"},{"key":"4911_CR47","unstructured":"Rakelly K, Zhou A, Finn C, et\u00a0al (2019) Efficient off-policy meta-reinforcement learning via probabilistic context variables. In: International conference on machine learning, PMLR, pp 5331\u20135340"},{"key":"4911_CR48","unstructured":"Ralaivola L, Szafranski M, Stempfel G (2009) Chromatic pac-bayes bounds for non-iid data. In: Artificial Intelligence and Statistics, PMLR, pp 416\u2013423"},{"issue":"65","key":"4911_CR49","first-page":"1927","volume":"11","author":"L Ralaivola","year":"2010","unstructured":"Ralaivola L, Szafranski M, Stempfel G (2010) Chromatic pac-bayes bounds for non-iid data: Applications to ranking and stationary $$\\beta $$-mixing processes. J Mach Learn Res 11(65):1927\u20131956","journal-title":"J Mach Learn Res"},{"key":"4911_CR50","unstructured":"Rezazadeh A (2022) A unified view on pac-bayes bounds for meta-learning. In: International Conference on Machine Learning, PMLR, pp 18,576\u201318,595"},{"key":"4911_CR51","unstructured":"Rothfuss J, Lee D, Clavera I, et\u00a0al (2019) Promp: Proximal meta-policy search. In: ICLR 2019: Proceedings of the Seventh International Conference on Learning Representations"},{"key":"4911_CR52","unstructured":"Rothfuss J, Fortuin V, Josifoski M, et\u00a0al (2021) Pacoh: Bayes-optimal meta-learning with pac-guarantees. In: International Conference on Machine Learning, PMLR, pp 9116\u20139126"},{"key":"4911_CR53","unstructured":"Scheinerman ER, Ullman DH (2011) Fractional graph theory: a rational approach to the theory of graphs. Courier Corp"},{"issue":"oct","key":"4911_CR54","first-page":"233","volume":"3","author":"M Seeger","year":"2002","unstructured":"Seeger M (2002) Pac-bayesian generalisation error bounds for gaussian process classification. J Mach Learn Res 3(oct):233\u2013269","journal-title":"J Mach Learn Res"},{"key":"4911_CR55","doi-asserted-by":"crossref","unstructured":"Shawe-Taylor J, Williamson RC (1997) A pac analysis of a bayesian estimator. In: Proceedings of the tenth annual conference on Computational learning theory, pp 2\u20139","DOI":"10.1145\/267460.267466"},{"key":"4911_CR56","doi-asserted-by":"crossref","unstructured":"Sutton RS, Barto AG (1998) Introduction to Reinforcement Learning. MIT Press","DOI":"10.1016\/S1474-6670(17)38315-5"},{"key":"4911_CR57","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: An introduction. MIT press"},{"key":"4911_CR58","doi-asserted-by":"crossref","unstructured":"Todorov E, Erez T, Tassa Y (2012) Mujoco: A physics engine for model-based control. In: 2012 IEEE\/RSJ international conference on intelligent robots and systems, IEEE, pp 5026\u20135033","DOI":"10.1109\/IROS.2012.6386109"},{"key":"4911_CR59","unstructured":"Wang H, Zheng S, Xiong C, et\u00a0al (2019) On the generalization gap in reparameterizable reinforcement learning. In: International Conference on Machine Learning, PMLR, pp 6648\u20136658"},{"issue":"3","key":"4911_CR60","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/BF00992696","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams RJ (1992) Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach learn 8(3):229\u2013256","journal-title":"Mach learn"},{"key":"4911_CR61","unstructured":"Yang Y, Caluwaerts K, Iscen A, et\u00a0al (2019) Norml: No-reward meta learning. In: Proceedings of the 18th International Conference on Autonomous Agents and MultiAgent Systems, pp 323\u2013331"},{"key":"4911_CR62","unstructured":"Yu T, Quillen D, He Z, et\u00a0al (2020) Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning. In: Kaelbling LP, Kragic D, Sugiura K (eds) Proceedings of the Conference on Robot Learning, Proceedings of Machine Learning Research, vol 100. PMLR, pp 1094\u20131100"},{"key":"4911_CR63","doi-asserted-by":"crossref","unstructured":"Zhang T, Guo S, Tan T, et\u00a0al (2022) Adjacency constraint for efficient hierarchical reinforcement learning. IEEE Trans Pattern Anal Mach Intell","DOI":"10.1109\/TPAMI.2022.3192418"},{"key":"4911_CR64","unstructured":"Zintgraf L, Shiarlis K, Igl M, et\u00a0al (2020) Varibad: A very good method for bayes-adaptive deep rl via meta-learning. In: ICLR 2020: Proceedings of the Eighth International Conference on Learning Representations"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-023-04911-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-023-04911-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-023-04911-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,25]],"date-time":"2023-10-25T23:13:36Z","timestamp":1698275616000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-023-04911-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,2]]},"references-count":64,"journal-issue":{"issue":"22","published-print":{"date-parts":[[2023,11]]}},"alternative-id":["4911"],"URL":"https:\/\/doi.org\/10.1007\/s10489-023-04911-y","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,9,2]]},"assertion":[{"value":"22 July 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 September 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declared that they had no conflicts of interest with this work","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}}]}}