{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T07:36:30Z","timestamp":1773387390365,"version":"3.50.1"},"reference-count":30,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Engineering Applications of Artificial Intelligence"],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1016\/j.engappai.2026.114204","type":"journal-article","created":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T13:37:01Z","timestamp":1771594621000},"page":"114204","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Convergence-aware selective backup for efficient and scalable policy optimization"],"prefix":"10.1016","volume":"171","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9624-3231","authenticated-orcid":false,"given":"Hamid","family":"Taheri","sequence":"first","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.engappai.2026.114204_bib1","doi-asserted-by":"crossref","DOI":"10.1007\/s00245-024-10103-y","article-title":"Dissipativity in infinite horizon optimal control and dynamic programming","volume":"89","author":"Angeli","year":"2024","journal-title":"Appl. Math. Optim."},{"key":"10.1016\/j.engappai.2026.114204_bib2","series-title":"Adv. Neural Inf. Process. Syst.","first-page":"4055","article-title":"Successor features for transfer in reinforcement learning","author":"Barreto","year":"2017"},{"key":"10.1016\/j.engappai.2026.114204_bib3","series-title":"AAAI Conf. Artif. Intell.","article-title":"Increasing the action gap: new operators for reinforcement learning","author":"Bellemare","year":"2016"},{"key":"10.1016\/j.engappai.2026.114204_bib4","series-title":"Dynamic Programming","author":"Bellman","year":"1957"},{"key":"10.1016\/j.engappai.2026.114204_bib5","series-title":"Lab. Inf. Decis. Syst. Rep. LIDS-P-234","article-title":"Temporal differences-based policy iteration and applications in neuro-dynamic programming","author":"Bertsekas","year":"1996"},{"key":"10.1016\/j.engappai.2026.114204_bib6","article-title":"On the linear convergence of approximate value iteration with smooth function approximation","author":"Bhandari","year":"2024","journal-title":"Oper. Res."},{"key":"10.1016\/j.engappai.2026.114204_bib7","series-title":"Int. Conf. Mach. Learn.","article-title":"Benchmarking deep reinforcement learning for continuous control","author":"Duan","year":"2016"},{"key":"10.1016\/j.engappai.2026.114204_bib8","series-title":"Dynamic Programming and Markov Processes","author":"Howard","year":"1960"},{"key":"10.1016\/j.engappai.2026.114204_bib9","first-page":"456","article-title":"Benchmarking Continuous-Control Algorithms on {MuJoCo}: A Comprehensive Study of Reproducibility and Performance","volume":"35","author":"Hutter","year":"2024","journal-title":"IEEE Transact. Neural Networks Learn. Syst."},{"key":"10.1016\/j.engappai.2026.114204_bib10","first-page":"1","article-title":"A survey of deep reinforcement learning benchmarks and evaluation protocols","volume":"23","author":"Kumar","year":"2022","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.engappai.2026.114204_bib11","series-title":"Proc. 41st Int. Conf. Mach. Learn.","article-title":"Finite-time convergence of stochastic approximation for discounted markov decision processes","author":"Kumar","year":"2024"},{"key":"10.1016\/j.engappai.2026.114204_bib12","series-title":"Proc. 26th Int. Conf. Artif. Intell. Stat.","first-page":"987","article-title":"Provably efficient prioritized sweeping with a generative model","author":"Li","year":"2023"},{"key":"10.1016\/j.engappai.2026.114204_bib13","series-title":"Proc. 39th Int. Conf. Mach. Learn. (ICML 2022)","first-page":"15714","article-title":"The {Atari} 100K Benchmark: Evaluating Sample Efficiency in Deep Reinforcement Learning","author":"Machado","year":"2022"},{"key":"10.1016\/j.engappai.2026.114204_bib14","doi-asserted-by":"crossref","first-page":"103","DOI":"10.1023\/A:1022635613229","article-title":"Prioritized sweeping: reinforcement learning with less data and less real time","volume":"13","author":"Moore","year":"1993","journal-title":"Mach. Learn."},{"key":"10.1016\/j.engappai.2026.114204_bib15","series-title":"Proc. 20th Int. Conf. Mach. Learn.","first-page":"560","article-title":"Error bounds for approximate value iteration","author":"Munos","year":"2003"},{"key":"10.1016\/j.engappai.2026.114204_bib16","series-title":"Int. Conf. Auton","article-title":"Lazy value iteration: a simple and efficient algorithm for planning in MDPs","author":"Padakandla","year":"2021"},{"key":"10.1016\/j.engappai.2026.114204_bib17","series-title":"Benchmarking Deep Reinforcement-Learning Algorithms on {MuJoCo}: New Baselines and Reproducibility Guidelines","author":"Peng","year":"2023"},{"key":"10.1016\/j.engappai.2026.114204_bib18","series-title":"Approximate Dynamic Programming: Solving the Curses of Dimensionality","author":"Powell","year":"2011"},{"key":"10.1016\/j.engappai.2026.114204_bib19","series-title":"P.S. Foundation, Tracemalloc \u2014 Trace Memory Usage in Python","year":"2023"},{"key":"10.1016\/j.engappai.2026.114204_bib20","series-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"Puterman","year":"1994"},{"key":"10.1016\/j.engappai.2026.114204_bib21","first-page":"123","article-title":"Scaling Deep Reinforcement Learning on {Atari} with Massive Distributed Training","volume":"7","author":"Racani\u00e8re","year":"2025","journal-title":"Nat. Mach. Intell."},{"key":"10.1016\/j.engappai.2026.114204_bib22","series-title":"Int. Conf. Learn. Represent.","article-title":"Prioritized experience replay","author":"Schaul","year":"2016"},{"key":"10.1016\/j.engappai.2026.114204_bib23","first-page":"2451","article-title":"Prioritized experience replay for deep reinforcement learning: theory and applications","volume":"56","author":"Schaul","year":"2023","journal-title":"Artif. Intell. Rev."},{"key":"10.1016\/j.engappai.2026.114204_bib24","first-page":"1181","article-title":"Performance bounds for \u03bb-policy iteration and application to the game of Tetris","volume":"14","author":"Scherrer","year":"2013","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.engappai.2026.114204_bib25","series-title":"Reinforcement Learning: an Introduction","author":"Sutton","year":"2018"},{"key":"10.1016\/j.engappai.2026.114204_bib26","first-page":"205","article-title":"Others, A survey of scalable and efficient reinforcement learning algorithms: from theory to practice, found","volume":"17","author":"Williams","year":"2024","journal-title":"Trends\u00aein Mach. Learn."},{"key":"10.1016\/j.engappai.2026.114204_bib27","series-title":"Int. Conf. Mach. Learn.","article-title":"Deep dynamic programming for planning in large-scale stochastic control problems","author":"Yang","year":"2019"},{"key":"10.1016\/j.engappai.2026.114204_bib28","series-title":"Int. Conf. Learn. Represent.","article-title":"Others, on the bias-variance tradeoff of prioritized experience replay","author":"Zha","year":"2023"},{"key":"10.1016\/j.engappai.2026.114204_bib29","article-title":"Efficient value iteration using stochastic error bounds","author":"Zhang","year":"2022","journal-title":"IEEE Trans. Automat. Control"},{"key":"10.1016\/j.engappai.2026.114204_bib30","article-title":"Others, sample-efficient variance-reduced Q-Learning for discounted MDPs","volume":"36","author":"Zhang","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."}],"container-title":["Engineering Applications of Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626004859?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626004859?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T03:34:42Z","timestamp":1773372882000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0952197626004859"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5]]},"references-count":30,"alternative-id":["S0952197626004859"],"URL":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114204","relation":{},"ISSN":["0952-1976"],"issn-type":[{"value":"0952-1976","type":"print"}],"subject":[],"published":{"date-parts":[[2026,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Convergence-aware selective backup for efficient and scalable policy optimization","name":"articletitle","label":"Article Title"},{"value":"Engineering Applications of Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114204","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"114204"}}