{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,4]],"date-time":"2026-02-04T08:39:12Z","timestamp":1770194352613,"version":"3.49.0"},"reference-count":55,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62176027"],"award-info":[{"award-number":["62176027"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Chongqing Talent","award":["cstc2024ycjh-bgzxm0082"],"award-info":[{"award-number":["cstc2024ycjh-bgzxm0082"]}]},{"name":"Central University Operating Expenses","award":["2024CDJGF-044"],"award-info":[{"award-number":["2024CDJGF-044"]}]},{"name":"Chongqing New YC Project","award":["CSTB2024YCJH-KYXM0126"],"award-info":[{"award-number":["CSTB2024YCJH-KYXM0126"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Artif. Intell."],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1109\/tai.2025.3592174","type":"journal-article","created":{"date-parts":[[2025,7,24]],"date-time":"2025-07-24T17:58:32Z","timestamp":1753379912000},"page":"1118-1130","source":"Crossref","is-referenced-by-count":0,"title":["Balanced Sampling and Reusing Imaginary Data for World Models in Reinforcement Learning"],"prefix":"10.1109","volume":"7","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-3231-9743","authenticated-orcid":false,"given":"Qianyu","family":"Wang","sequence":"first","affiliation":[{"name":"School of Computer Science, Chongqing University, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3761-1759","authenticated-orcid":false,"given":"Xuekai","family":"Wei","sequence":"additional","affiliation":[{"name":"School of Computer Science, Chongqing University, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8342-7453","authenticated-orcid":false,"given":"Jielu","family":"Yan","sequence":"additional","affiliation":[{"name":"School of Computer Science, Chongqing University, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5135-5165","authenticated-orcid":false,"given":"Leong","family":"Hou U","sequence":"additional","affiliation":[{"name":"Department of Computer and Information Science, State Key Laboratory of Internet of Things for Smart City, Centre for Data Science, University of Macau, Macau, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9830-3955","authenticated-orcid":false,"given":"Huayan","family":"Pu","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Mechanical Transmissions, Chongqing University, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1314-5631","authenticated-orcid":false,"given":"Jun","family":"Luo","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Mechanical Transmissions, Chongqing University, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1000-3937","authenticated-orcid":false,"given":"Weijia","family":"Jia","sequence":"additional","affiliation":[{"name":"BNU-UIC Institute of Artificial Intelligence and Future Networks, Beijing Normal University and Guangdong Key Laboratory of AI Multi-Modal Data Processing, BNU-HKBU United International College, Zhuhai, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1874-3641","authenticated-orcid":false,"given":"Mingliang","family":"Zhou","sequence":"additional","affiliation":[{"name":"School of Computer Science, Chongqing University, Chongqing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref2","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2022.3225256"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2021.3087666"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1561\/2200000071"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2024.3379969"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2023.3268612"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"ref9","article-title":"When to use parametric models in reinforcement learning?","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Van Hasselt","year":"2019"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.2307\/j.ctt4cgngj.10"},{"key":"ref11","article-title":"Image augmentation is all you need: Regularizing deep reinforcement learning from pixels","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yarats","year":"2021"},{"key":"ref12","first-page":"19884","article-title":"Reinforcement learning with augmented data","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Laskin","year":"2020"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17276"},{"key":"ref14","article-title":"Data-efficient reinforcement learning with self-predictive representations","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Schwarzer","year":"2021"},{"issue":"1","key":"ref15","article-title":"A path towards autonomous machine intelligence version 0.9","volume":"62","author":"LeCun","year":"2022","journal-title":"Open Rev."},{"key":"ref16","first-page":"25476","article-title":"Mastering Atari games with limited data","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Ye","year":"2021"},{"key":"ref17","article-title":"Model based reinforcement learning for Atari","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kaiser","year":"2020"},{"key":"ref18","article-title":"Transformers are sample-efficient world models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Micheli","year":"2023"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref20","article-title":"Mastering diverse domains through world models","author":"Hafner","year":"2023"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1406.1078"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref23","article-title":"Transformer-based world models are happy with 100k interactions","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Robine","year":"2023"},{"key":"ref24","article-title":"Transdreamer: Reinforcement learning with transformer world models","volume-title":"Deep RL Workshop NeurIPS","author":"Chen","year":"2021"},{"key":"ref25","article-title":"Storm: Efficient stochastic transformer based world models for reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Zhang","year":"2024"},{"key":"ref26","first-page":"3042","article-title":"Revisiting fundamentals of experience replay","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Fedus","year":"2020"},{"key":"ref27","first-page":"16828","article-title":"The primacy bias in deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"162","author":"Nikishin","year":"2022"},{"key":"ref28","article-title":"Sample-efficient reinforcement learning by breaking the replay ratio barrier","volume-title":"Proc. Int. Conf, Learn. Representations","author":"D\u2019Oro","year":"2023"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1203"},{"key":"ref30","article-title":"Recurrent world models facilitate policy evolution","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Ha","year":"2018"},{"key":"ref31","article-title":"Dream to control: Learning behaviors by latent imagination","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hafner","year":"2019"},{"key":"ref32","article-title":"Mastering Atari with discrete world models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hafner","year":"2020"},{"key":"ref33","first-page":"7487","article-title":"Stabilizing transformers for reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Parisotto","year":"2020"},{"key":"ref34","article-title":"Learning to play Atari in a world of tokens","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Agarwal","year":"2024"},{"key":"ref35","article-title":"Improving token-based world models with parallel observation prediction","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Cohen","year":"2024"},{"key":"ref36","article-title":"Retentive network: A successor to transformer for large language models","author":"Sun","year":"2023"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"key":"ref38","volume-title":"Reinforcement Learning for Robots Using Neural Networks.","author":"Lin","year":"1992"},{"key":"ref39","article-title":"Prioritized experience replay","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Schaul","year":"2016"},{"key":"ref40","article-title":"Deep learning scaling is predictable, empirically","author":"Hestness","year":"2017"},{"key":"ref41","article-title":"Scaling laws for neural language models","author":"Kaplan","year":"2020"},{"key":"ref42","first-page":"30016","article-title":"Training compute-optimal large language models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hoffmann","year":"2022"},{"key":"ref43","first-page":"27921","article-title":"Multi-game decision transformers","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Lee","year":"2022"},{"key":"ref44","first-page":"313","article-title":"How to learn a useful critic? Model-based action-gradient-estimator policy optimization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"D\u2019Oro","year":"2020"},{"key":"ref45","article-title":"Implicit under-parameterization inhibits data-efficient deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kumar","year":"2020"},{"key":"ref46","article-title":"Dropout q-functions for doubly efficient reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hiraoka","year":"2021"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2023.xix.056"},{"key":"ref48","article-title":"Sample efficient actor-critic with experience replay","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang","year":"2022"},{"key":"ref49","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref51","article-title":"Auto-encoding variational bayes","volume-title":"Proc. Int. Conf. Learn. Representations, ICLR\u2013Conf. Track Proc.","author":"Kingma","year":"2014"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1080\/09540099108946587"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-2649-2"},{"key":"ref54","first-page":"104","article-title":"An optimistic perspective on offline reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Agarwal","year":"2020"},{"key":"ref55","first-page":"29304","article-title":"Deep reinforcement learning at the edge of the statistical precipice","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Agarwal","year":"2021"}],"container-title":["IEEE Transactions on Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/9078688\/11370309\/11095863.pdf?arnumber=11095863","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T20:57:54Z","timestamp":1770152274000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11095863\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2]]},"references-count":55,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tai.2025.3592174","relation":{},"ISSN":["2691-4581"],"issn-type":[{"value":"2691-4581","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2]]}}}