{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T09:48:19Z","timestamp":1758361699838,"version":"3.44.0"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2025,3,15]],"date-time":"2025-03-15T00:00:00Z","timestamp":1741996800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,3,15]],"date-time":"2025-03-15T00:00:00Z","timestamp":1741996800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"China National Key R&D Research Program","award":["2020YFB1711200","2019YFB1705801"],"award-info":[{"award-number":["2020YFB1711200","2019YFB1705801"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1007\/s10489-025-06303-w","type":"journal-article","created":{"date-parts":[[2025,3,15]],"date-time":"2025-03-15T00:44:46Z","timestamp":1741999486000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Uncertainty weighted policy optimization based on Bayesian approximation"],"prefix":"10.1007","volume":"55","author":[{"given":"Tianyi","family":"Li","sequence":"first","affiliation":[]},{"given":"Genke","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Chu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,3,15]]},"reference":[{"key":"6303_CR1","doi-asserted-by":"crossref","unstructured":"Zhu Z, Lin K, Jain AK, Zhou J (2023) Transfer learning in deep reinforcement learning: A survey. IEEE Trans Pattern Anal Mach Intell 45(11):13344\u201313362. https:\/\/doi.org\/10.1109\/TPAMI.2023.3292075","DOI":"10.1109\/TPAMI.2023.3292075"},{"key":"6303_CR2","doi-asserted-by":"crossref","unstructured":"Zhang G, Kashima H (2024) Learning state importance for preference-based reinforcement learning. Mach Learn 113(4):1885\u20131901. https:\/\/doi.org\/10.1007\/s10994-022-06295-5","DOI":"10.1007\/s10994-022-06295-5"},{"key":"6303_CR3","doi-asserted-by":"crossref","unstructured":"Chen X et al (2023) Leveraging deep learning for automatic literature screening in intelligent bibliometrics. Intl J Mach Learn Cybern 14(4):1483\u20131525. https:\/\/doi.org\/10.1007\/s13042-022-01710-8","DOI":"10.1007\/s13042-022-01710-8"},{"key":"6303_CR4","doi-asserted-by":"crossref","unstructured":"Ghodhbani R, Saidani T, Zayeni H (2023) Deploying deep learning networks based advanced techniques for image processing on FPGA platform. Neural Comput Appl 35(26):18949\u201318969. https:\/\/doi.org\/10.1007\/s00521-023-08718-3","DOI":"10.1007\/s00521-023-08718-3"},{"key":"6303_CR5","doi-asserted-by":"crossref","unstructured":"Fan D, Shen H, Dong L (2023) Twin attentive deep reinforcement learning for multi-agent defensive convoy. Intl J Mach Learn Cybern 14(6):2239\u20132250. https:\/\/doi.org\/10.1007\/s13042-022-01759-5","DOI":"10.1007\/s13042-022-01759-5"},{"key":"6303_CR6","doi-asserted-by":"crossref","unstructured":"Hu W, Wang H, He M, Wang N (2023) Uncertainty-aware hierarchical reinforcement learning for long-horizon tasks. Appl Intell 53(23):28555\u201328569. https:\/\/doi.org\/10.1007\/s10489-023-05022-4","DOI":"10.1007\/s10489-023-05022-4"},{"key":"6303_CR7","doi-asserted-by":"crossref","unstructured":"Schrittwieser J et al (2020) Mastering Atari, Go, chess and shogi by planning with a learned model. Nature 588:604\u2013609. https:\/\/doi.org\/10.1038\/s41586-020-03051-4","DOI":"10.1038\/s41586-020-03051-4"},{"key":"6303_CR8","doi-asserted-by":"crossref","unstructured":"Liu R, Pang Z, Meng Z, Wang W, Yu Y, Lu T (2022) On efficient reinforcement learning for full-length game of StarCraft II. J Artif Intell Res 75:213\u2013260. https:\/\/doi.org\/10.1613\/jair.1.13743","DOI":"10.1613\/jair.1.13743"},{"key":"6303_CR9","doi-asserted-by":"crossref","unstructured":"Ecoffet A, Huizinga J, Lehman J, Stanley KO, Clune J (2021) First return, then explore. Nature 590(7847):580\u2013586. https:\/\/doi.org\/10.1038\/s41586-020-03157-9","DOI":"10.1038\/s41586-020-03157-9"},{"key":"6303_CR10","doi-asserted-by":"crossref","unstructured":"Lei Y et al, New challenges in reinforcement learning: a survey of security and privacy. Artif Intell Rev 56(7):7195\u20137236. https:\/\/doi.org\/10.1007\/s10462-022-10348-5","DOI":"10.1007\/s10462-022-10348-5"},{"key":"6303_CR11","doi-asserted-by":"crossref","unstructured":"Wu P, Luo S, Tian L, Mao B, Chen W (2024) Consistent epistemic planning for multiagent deep reinforcement learning. Intl J Mach Learn Cybern 15(5):1663\u20131675. https:\/\/doi.org\/10.1007\/s13042-023-01989-1","DOI":"10.1007\/s13042-023-01989-1"},{"key":"6303_CR12","doi-asserted-by":"crossref","unstructured":"Gong Y, Xiong H, Li M, Wang H, Nian X (2023) Reinforcement learning for multi-agent formation navigation with scalability. Appl Intell 53(23):28207\u201328225. https:\/\/doi.org\/10.1007\/s10489-023-05007-3","DOI":"10.1007\/s10489-023-05007-3"},{"key":"6303_CR13","doi-asserted-by":"crossref","unstructured":"Wu Z, Chen C, Huang S (2022) Poisoning attacks against knowledge graph-based recommendation systems using deep reinforcement learning. Neural Comput Appl 34(4):3097\u20133115. https:\/\/doi.org\/10.1007\/s00521-021-06573-8","DOI":"10.1007\/s00521-021-06573-8"},{"key":"6303_CR14","doi-asserted-by":"crossref","unstructured":"Yu C, Chen M, Lin H (2023) Learning key steps to attack deep reinforcement learning agents. Mach Learn 112(5):1499\u20131522. https:\/\/doi.org\/10.1007\/s10994-023-06318-9","DOI":"10.1007\/s10994-023-06318-9"},{"key":"6303_CR15","doi-asserted-by":"crossref","unstructured":"Sun Q, Si Y (2023) Supervised actor-critic reinforcement learning with action feedback for algorithmic trading. Appl Intell 53(13):16875\u201316892. https:\/\/doi.org\/10.1007\/s10489-022-04322-5","DOI":"10.1007\/s10489-022-04322-5"},{"key":"6303_CR16","doi-asserted-by":"crossref","unstructured":"Garaffa LC, Basso M, Konzen AA, de Freitas EP (2023) Reinforcement learning for mobile robotics exploration: A survey. IEEE Trans Neural Netw Learn Syst 34(8):3796\u20133810. https:\/\/doi.org\/10.1109\/TNNLS.2021.3124466","DOI":"10.1109\/TNNLS.2021.3124466"},{"key":"6303_CR17","doi-asserted-by":"crossref","unstructured":"Qu X, Ong Y, Gupta A (2022) Frame-correlation transfers trigger economical attacks on deep reinforcement learning policies. IEEE Trans Cybern 52(8):7577\u20137590. https:\/\/doi.org\/10.1109\/TCYB.2020.3041265","DOI":"10.1109\/TCYB.2020.3041265"},{"key":"6303_CR18","doi-asserted-by":"crossref","unstructured":"Wang S, Yang R, Li B, Kan Z (2023) Structural parameter space exploration for reinforcement learning via a matrix variate distribution. IEEE Trans Emerg Topic Comp Intell 7(4):1025\u20131035. https:\/\/doi.org\/10.1109\/TETCI.2022.3140380","DOI":"10.1109\/TETCI.2022.3140380"},{"key":"6303_CR19","doi-asserted-by":"crossref","unstructured":"Mourgias-Alexandris G et al Noise-resilient and high-speed deep learning with coherent silicon photonics. Nat Commun 13. https:\/\/doi.org\/10.1038\/s41467-022-33259-z","DOI":"10.1038\/s41467-022-33259-z"},{"key":"6303_CR20","doi-asserted-by":"crossref","unstructured":"Zhang D et al (2024) An improved soft actor-critic-based energy management strategy of fuel cell hybrid vehicles with a nonlinear fuel cell degradation model. Int J Precis Eng Manuf Green Technol 11(1):183-202. https:\/\/doi.org\/10.1007\/s40684-023-00547-y","DOI":"10.1007\/s40684-023-00547-y"},{"key":"6303_CR21","doi-asserted-by":"crossref","unstructured":"Yang Q, Sim\u00e3o TD, Tindemans SH, Spaan TJ (2023) Safety-constrained reinforcement learning with a distributional safety critic. Mach Learn 112(3):859\u2013887. https:\/\/doi.org\/10.1007\/s10994-022-06187-8","DOI":"10.1007\/s10994-022-06187-8"},{"key":"6303_CR22","doi-asserted-by":"crossref","unstructured":"Xu Y, Wei Y, Jiang K, Chen L, Wang D, Deng H (2023) Action decoupled SAC reinforcement learning with discrete-continuous hybrid action spaces. Neurocomputing 537:141\u2013151. https:\/\/doi.org\/10.1016\/j.neucom.2023.03.054","DOI":"10.1016\/j.neucom.2023.03.054"},{"key":"6303_CR23","doi-asserted-by":"crossref","unstructured":"Gong X, Yu J, L\u00fc S, Lu H (2022) Actor-critic with familiarity-based trajectory experience replay. Inf Sci 582:633-647. https:\/\/doi.org\/10.1016\/j.ins.2021.10.031","DOI":"10.1016\/j.ins.2021.10.031"},{"key":"6303_CR24","unstructured":"Mavor-Parker A, Young K, Barry C, Griffin L (2022) How to stay curious while avoiding noisy TVs using aleatoric uncertainty estimation. In: Proc mach learn res 162:15220\u201315240"},{"key":"6303_CR25","unstructured":"Ratzlaff N, Bai Q, Fuxin L, Xu W (2020) Implicit generative modeling for efficient exploration. In: Proc Int Conf Mach Learn pp 7985\u20137995"},{"key":"6303_CR26","doi-asserted-by":"crossref","unstructured":"Bonnet D et al (2023) Bringing uncertainty quantification to the extreme-edge with memristor-based Bayesian neural networks. Nat Commun 14(1):7530. https:\/\/doi.org\/10.1038\/s41467-023-43317-9","DOI":"10.1038\/s41467-023-43317-9"},{"key":"6303_CR27","doi-asserted-by":"crossref","unstructured":"Magris M, Iosifidis A (2023) Bayesian learning for neural networks: an algorithmic survey. Artif Intell Rev 56(10):11773\u201311823. https:\/\/doi.org\/10.1007\/s10462-023-10443-1","DOI":"10.1007\/s10462-023-10443-1"},{"key":"6303_CR28","doi-asserted-by":"crossref","unstructured":"Pacelli R, Ariosto S, Pastore M, Ginelli F, Gherardi M, Rotondo P (2023) A statistical mechanics framework for Bayesian deep neural networks beyond the infinite-width limit. Nat Mach Intell 5(12):1497\u20131507. https:\/\/doi.org\/10.1038\/s42256-023-00767-6","DOI":"10.1038\/s42256-023-00767-6"},{"key":"6303_CR29","doi-asserted-by":"crossref","unstructured":"Wang X, Li T, Cheng Y, Chen CLP (2022) Inference-based posteriori parameter distribution optimization. IEEE Trans Cybern 52(5):3006\u20133017. https:\/\/doi.org\/10.1109\/TCYB.2020.3023127","DOI":"10.1109\/TCYB.2020.3023127"},{"key":"6303_CR30","doi-asserted-by":"crossref","unstructured":"Wu G, Domke J, Sanner S (2022) Arbitrary conditional inference in variational autoencoders via fast prior network training. Mach Learn 111(7):2537\u20132559. https:\/\/doi.org\/10.1007\/s10994-022-06171-2","DOI":"10.1007\/s10994-022-06171-2"},{"key":"6303_CR31","unstructured":"Li Z, Li Y, Zhang Y, Zhang T, Luo ZQ (2022) HyperDQN: A randomized exploration method for deep reinforcement learning. In: Proc int conf learn represent"},{"key":"6303_CR32","doi-asserted-by":"crossref","unstructured":"Li T, Yang G, Chu J (2024) Implicit posteriori parameter distribution optimization in reinforcement learning. IEEE Trans Cybern 54(5):3051\u20133064. https:\/\/doi.org\/10.1109\/TCYB.2023.3254596","DOI":"10.1109\/TCYB.2023.3254596"},{"key":"6303_CR33","unstructured":"Peer O, Tessler C, Merlis N, Meir R (2021) Ensemble bootstrapping for Q-learning. In: Proc int conf mach learn pp 8454\u20138463"},{"key":"6303_CR34","unstructured":"Hiraoka T, Imagawa T, Hashimoto T, Onishi T, Tsuruoka YY (2022) Dropout Q-functions for doubly efficient reinforcement learning. In: Proc int conf learn represent"},{"key":"6303_CR35","unstructured":"Gal Y, Ghahramani Z (2016) Dropout as a Bayesian approximation: representing model uncertainty in deep learning. In: Proc int conf mach learn pp 1651\u20131660"},{"key":"6303_CR36","doi-asserted-by":"crossref","unstructured":"Wang Q, Wang S, S, Wang B (2023) Class-rebalanced wasserstein distance for multi-source domain adaptation. Appl Intell 53(7):8024\u20138038. https:\/\/doi.org\/10.1007\/s10489-022-03810-y","DOI":"10.1007\/s10489-022-03810-y"},{"key":"6303_CR37","doi-asserted-by":"crossref","unstructured":"Zhang X, Liu Y, Xu X, Huang Q, Mao H, Carie A (2021) Structural relational inference actor-critic for multi-agent reinforcement learning. Neurocomputing 459:383\u2013394. https:\/\/doi.org\/10.1016\/j.neucom.2021.07.014","DOI":"10.1016\/j.neucom.2021.07.014"},{"key":"6303_CR38","unstructured":"Mai V, Mani K, Paull L (2022) Sample efficient deep reinforcement learning via uncertainty estimation. In: Proc int conf learn represent"},{"key":"6303_CR39","doi-asserted-by":"crossref","unstructured":"Raziei Z, Moghaddam M (2021) Adaptable automation with modular deep reinforcement learning and policy transfer. Eng Appl Artif Intell 103. https:\/\/doi.org\/10.1016\/j.engappai.2021.104296","DOI":"10.1016\/j.engappai.2021.104296"},{"key":"6303_CR40","unstructured":"Fujimoto S, Gu SS (2021) A minimalist approach to offline reinforcement learning. In: Adv neural inf proces syst 34:20132\u201320145"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06303-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-025-06303-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-025-06303-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T19:33:17Z","timestamp":1758310397000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-025-06303-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,15]]},"references-count":40,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2025,5]]}},"alternative-id":["6303"],"URL":"https:\/\/doi.org\/10.1007\/s10489-025-06303-w","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"type":"print","value":"0924-669X"},{"type":"electronic","value":"1573-7497"}],"subject":[],"published":{"date-parts":[[2025,3,15]]},"assertion":[{"value":"28 January 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 March 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"The research not involving human participants or animals.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}}],"article-number":"532"}}