{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T06:02:05Z","timestamp":1773122525487,"version":"3.50.1"},"reference-count":40,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2023YFF0905400"],"award-info":[{"award-number":["2023YFF0905400"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013141","name":"Jilin Provincial Key Research and Development Plan Project","doi-asserted-by":"publisher","award":["20240304200SF"],"award-info":[{"award-number":["20240304200SF"]}],"id":[{"id":"10.13039\/501100013141","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010909","name":"Excellent Young Scientists Fund","doi-asserted-by":"publisher","award":["62506142"],"award-info":[{"award-number":["62506142"]}],"id":[{"id":"10.13039\/501100010909","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U2341229"],"award-info":[{"award-number":["U2341229"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62476110"],"award-info":[{"award-number":["62476110"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1016\/j.patcog.2025.112824","type":"journal-article","created":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T17:06:21Z","timestamp":1764867981000},"page":"112824","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["State transition difference prediction for deep reinforcement learning"],"prefix":"10.1016","volume":"173","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-3286-7444","authenticated-orcid":false,"given":"Haotian","family":"Chi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3958-8740","authenticated-orcid":false,"given":"Zhaogeng","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5685-8506","authenticated-orcid":false,"given":"Xing","family":"Chen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3192-8736","authenticated-orcid":false,"given":"Bohao","family":"Qu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8658-9447","authenticated-orcid":false,"given":"Jifeng","family":"Hu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4629-9901","authenticated-orcid":false,"given":"Yuan","family":"Jiang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7835-9556","authenticated-orcid":false,"given":"Hechang","family":"Chen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2697-8093","authenticated-orcid":false,"given":"Yi","family":"Chang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"7540","key":"10.1016\/j.patcog.2025.112824_bib0001","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"Mnih","year":"2015","journal-title":"Nature"},{"key":"10.1016\/j.patcog.2025.112824_bib0002","unstructured":"J. Schulman, F. Wolski, P. Dhariwal, A. Radford, O. Klimov, Proximal policy optimization algorithms, (2017) arXiv: 1707.06347."},{"key":"10.1016\/j.patcog.2025.112824_bib0003","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110576","article-title":"OA-pose: occlusion-aware monocular 6-DoF object pose estimation under geometry alignment for robot manipulation","volume":"154","author":"Wang","year":"2024","journal-title":"Pattern Recognit."},{"issue":"9","key":"10.1016\/j.patcog.2025.112824_bib0004","doi-asserted-by":"crossref","first-page":"4678","DOI":"10.1109\/TKDE.2024.3376745","article-title":"FELight: fairness-aware traffic signal control via sample-efficient reinforcement learning","volume":"36","author":"Du","year":"2024","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10.1016\/j.patcog.2025.112824_bib0005","series-title":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence, IJCAI-23","first-page":"4766","article-title":"Stockformer: learning hybrid trading machines with predictive coding","author":"Gao","year":"2023"},{"key":"10.1016\/j.patcog.2025.112824_bib0006","doi-asserted-by":"crossref","first-page":"379","DOI":"10.1016\/j.neunet.2018.07.006","article-title":"State representation learning for control: an overview","volume":"108","author":"Lesort","year":"2018","journal-title":"Neural Netw."},{"key":"10.1016\/j.patcog.2025.112824_bib0007","series-title":"4th International Conference on Learning Representations, ICLR","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2016"},{"key":"10.1016\/j.patcog.2025.112824_bib0008","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109917","article-title":"Robust multi-agent reinforcement learning via Bayesian distributional value estimation","volume":"145","author":"Du","year":"2024","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2025.112824_bib0009","unstructured":"T. Lesort, M. Seurin, X. Li, N. D\u00edaz-Rodr\u00edguez, D. Filliat, Unsupervised state representation learning with robotic priors: a robustness benchmark,(2017) arXiv: 1709.05185."},{"key":"10.1016\/j.patcog.2025.112824_bib0010","first-page":"2746","article-title":"Embed to control: a locally linear latent dynamics model for control from raw images","volume":"28","author":"Watter","year":"2015","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2025.112824_bib0011","first-page":"67565","article-title":"State sequences prediction via fourier transform for representation learning","volume":"36","author":"Ye","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2025.112824_bib0012","series-title":"2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems","first-page":"5026","article-title":"Mujoco: a physics engine for model-based control","author":"Todorov","year":"2012"},{"key":"10.1016\/j.patcog.2025.112824_bib0013","unstructured":"J. Fu, A. Kumar, O. Nachum, G. Tucker, S. Levine, D4rl: datasets for deep data-driven reinforcement learning,(2020) arXiv: 2004.07219."},{"key":"10.1016\/j.patcog.2025.112824_bib0014","series-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"10.1016\/j.patcog.2025.112824_bib0015","series-title":"International Conference on Machine Learning","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"Fujimoto","year":"2018"},{"key":"10.1016\/j.patcog.2025.112824_bib0016","series-title":"9th International Conference on Learning Representations, ICLR","article-title":"Return-based contrastive representation learning for reinforcement learning","author":"Liu","year":"2021"},{"key":"10.1016\/j.patcog.2025.112824_bib0017","series-title":"International Conference on Machine Learning","first-page":"7424","article-title":"Can increasing input dimensionality improve deep reinforcement learning?","author":"Ota","year":"2020"},{"key":"10.1016\/j.patcog.2025.112824_bib0018","first-page":"61573","article-title":"For sale: state-action representation learning for deep reinforcement learning","volume":"36","author":"Fujimoto","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2025.112824_bib0019","doi-asserted-by":"crossref","DOI":"10.1109\/TNNLS.2025.3598928","article-title":"Continual diffuser (CoD): mastering continual offline RL with experience rehearsal","author":"Hu","year":"2025","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.patcog.2025.112824_bib0020","unstructured":"J. Hu, S. Huang, Z. Yang, S. Hu, L. Shen, H. Chen, L. Sun, Y. Chang, D. Tao, Analytic energy-guided policy optimization for offline reinforcement learning,(2025b) arXiv: 2505.01822."},{"key":"10.1016\/j.patcog.2025.112824_bib0021","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110938","article-title":"Improving deep representation learning via auxiliary learnable target coding","volume":"157","author":"Liu","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2025.112824_bib0022","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.111655","article-title":"Multi-grained contrast for data-efficient unsupervised representation learning","volume":"165","author":"Shen","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2025.112824_bib0023","series-title":"International Conference on Machine Learning","first-page":"2170","article-title":"Deepmdp: learning continuous latent space models for representation learning","author":"Gelada","year":"2019"},{"key":"10.1016\/j.patcog.2025.112824_bib0024","series-title":"9th International Conference on Learning Representations, ICLR","article-title":"Data-efficient reinforcement learning with self-predictive representations","author":"Schwarzer","year":"2021"},{"key":"10.1016\/j.patcog.2025.112824_bib0025","doi-asserted-by":"crossref","first-page":"154","DOI":"10.1016\/j.neucom.2021.11.031","article-title":"Contrastive predictive coding with transformer for video representation learning","volume":"482","author":"Liu","year":"2022","journal-title":"Neurocomputing"},{"key":"10.1016\/j.patcog.2025.112824_bib0026","series-title":"2018 IEEE Winter Conference on Applications of Computer Vision (WACV)","first-page":"1587","article-title":"Temporal difference networks for video action recognition","author":"Ng","year":"2018"},{"key":"10.1016\/j.patcog.2025.112824_bib0027","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110713","article-title":"Motion-guided spatiotemporal multitask feature discrimination for self-supervised video representation learning","volume":"155","author":"Bi","year":"2024","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2025.112824_bib0028","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"909","article-title":"Tea: temporal excitation and aggregation for action recognition","author":"Li","year":"2020"},{"key":"10.1016\/j.patcog.2025.112824_bib0029","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"17411","article-title":"Look back and forth: video super-resolution with explicit temporal difference modeling","author":"Isobe","year":"2022"},{"key":"10.1016\/j.patcog.2025.112824_bib0030","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"17131","article-title":"Mutual information-based temporal difference learning for human pose estimation in video","author":"Feng","year":"2023"},{"key":"10.1016\/j.patcog.2025.112824_bib0031","series-title":"Breakthroughs in Statistics: Methodology and Distribution","first-page":"492","article-title":"Robust estimation of a location parameter","author":"Huber","year":"1992"},{"key":"10.1016\/j.patcog.2025.112824_bib0032","first-page":"21271","article-title":"Bootstrap your own latent-a new approach to self-supervised learning","volume":"33","author":"Grill","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2025.112824_bib0033","unstructured":"G. Brockman, V. Cheung, L. Pettersson, J. Schneider, J. Schulman, J. Tang, W. Zaremba, Openai gym,(2016) arXiv: 1606.01540."},{"key":"10.1016\/j.patcog.2025.112824_bib0034","series-title":"International Conference on Machine Learning","first-page":"1861","article-title":"Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"Haarnoja","year":"2018"},{"key":"10.1016\/j.patcog.2025.112824_bib0035","first-page":"20132","article-title":"A minimalist approach to offline reinforcement learning","volume":"34","author":"Fujimoto","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2025.112824_bib0036","series-title":"9th International Conference on Learning Representations, ICLR","article-title":"Mastering atari with discrete world models","author":"Hafner","year":"2021"},{"key":"10.1016\/j.patcog.2025.112824_bib0037","series-title":"The Eleventh International Conference on Learning Representations, ICLR","article-title":"Extreme Q-learning: MaxEnt RL without entropy","author":"Garg","year":"2023"},{"key":"10.1016\/j.patcog.2025.112824_bib0038","series-title":"International Conference on Machine Learning","first-page":"16691","article-title":"Recurrent model-free RL can be a strong baseline for many POMDPs","volume":"162","author":"Ni","year":"2022"},{"key":"10.1016\/j.patcog.2025.112824_bib0039","first-page":"741","article-title":"Stochastic latent actor-critic: deep reinforcement learning with a latent variable model","volume":"33","author":"Lee","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2025.112824_bib0040","doi-asserted-by":"crossref","first-page":"73309","DOI":"10.52202\/079017-2332","article-title":"State chrono representation for enhancing generalization in reinforcement learning","volume":"37","author":"Chen","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320325014876?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320325014876?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,9]],"date-time":"2026-03-09T17:19:45Z","timestamp":1773076785000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320325014876"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5]]},"references-count":40,"alternative-id":["S0031320325014876"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2025.112824","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"State transition difference prediction for deep reinforcement learning","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2025.112824","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"112824"}}