{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T05:42:44Z","timestamp":1767678164633,"version":"3.48.0"},"reference-count":44,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"name":"Wien Bibliothek through its Open Access Funding Programme"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1109\/lra.2025.3648611","type":"journal-article","created":{"date-parts":[[2025,12,25]],"date-time":"2025-12-25T18:28:07Z","timestamp":1766687287000},"page":"2162-2169","source":"Crossref","is-referenced-by-count":0,"title":["<i>DoublyAware:<\/i>\n                    Dual Planning and Policy Awareness for Temporal Difference Learning in Humanoid Locomotion"],"prefix":"10.1109","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3471-5533","authenticated-orcid":false,"given":"Khang","family":"Nguyen","sequence":"first","affiliation":[{"name":"Department of Robotics, Mohamed bin Zayed University of Artificial Intelligence (MBZUAI), Abu Dhabi, UAE"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0929-3316","authenticated-orcid":false,"given":"An T.","family":"Le","sequence":"additional","affiliation":[{"name":"VinUniversity, Hanoi, Vietnam"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5266-8091","authenticated-orcid":false,"given":"Jan","family":"Peters","sequence":"additional","affiliation":[{"name":"Intelligent Autonomous Systems Lab, Hessian.AI, Technische Universit&#x00E4;t Darmstadt, Darmstadt, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0692-8830","authenticated-orcid":false,"given":"Minh Nhat","family":"Vu","sequence":"additional","affiliation":[{"name":"Automation &amp; Control Institute (ACIN), Technische Universit&#x00E4;t Wien, Vienna, Austria"}]}],"member":"263","reference":[{"article-title":"Learning humanoid locomotion with transformers","year":"2023","author":"Radosavovic","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.adi9579"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-04924-3_5"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24853-0"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-44584-3_35"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-021-05946-3"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/iros60139.2025.11245653"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/iros60139.2025.11247640"},{"key":"ref9","first-page":"8387","article-title":"Temporal difference learning for model predictive control","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Hansen","year":"2022"},{"key":"ref10","article-title":"TD-MPC2: Scalable, robust world models for continuous control","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hansen","year":"2023"},{"key":"ref11","first-page":"4759","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Chua","year":"2018"},{"article-title":"Model-based offline planning","year":"2020","author":"Argenson","key":"ref12"},{"article-title":"Improving TD-MPC through policy constraint","year":"2025","author":"Lin","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/b106715"},{"article-title":"Pushing the limits of mathematical reasoning in open language models","year":"2024","author":"Shao","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.061"},{"key":"ref17","first-page":"1","article-title":"Reinforcement learning for humanoid robotics","volume-title":"Proc. IEEE-RAS Int. Conf. Humanoid Robots","author":"Peters","year":"2003"},{"key":"ref18","first-page":"1622","article-title":"Learning off-policy with online planning","volume-title":"Proc. Conf. Robot Learn.","author":"Sikchi","year":"2022"},{"article-title":"HumanPlus: Humanoid shadowing and imitation from humans","year":"2024","author":"Fu","key":"ref19"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA55743.2025.11128549"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/RO-MAN60168.2024.10731316"},{"article-title":"Learning motion skills with adaptive assistive curriculum force in humanoid robots","year":"2025","author":"Cao","key":"ref22"},{"key":"ref23","first-page":"234","article-title":"Conformal prediction in manifold learning","volume-title":"Proc. 7th Workshop Conformal Probabilistic Prediction Appl.","author":"Kuleshov","year":"2018"},{"article-title":"Conformal prediction with learned features","year":"2024","author":"Kiyani","key":"ref24"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CDC49753.2023.10384075"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3468092"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610223"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.008"},{"key":"ref29","first-page":"11784","article-title":"Stabilizing off-policy Q-learning via bootstrapping error reduction","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Kumar","year":"2019"},{"key":"ref30","first-page":"1179","article-title":"Conservative Q-learning for offline reinforcement learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Kumar","year":"2020"},{"key":"ref31","first-page":"20132","article-title":"A minimalist approach to offline reinforcement learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Fujimoto","year":"2021"},{"key":"ref32","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fujimoto","year":"2019"},{"article-title":"Advantage-weighted regression: Simple and scalable off-policy reinforcement learning","year":"2019","author":"Peng","key":"ref33"},{"article-title":"Extreme Q-learning: Maxent RL without entropy","year":"2023","author":"Garg","key":"ref34"},{"article-title":"Offline reinforcement learning with implicit Q-learning","year":"2021","author":"Kostrikov","key":"ref35"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487277"},{"article-title":"TD-GRPC: Temporal difference learning with group relative policy constraint for humanoid locomotion","year":"2025","author":"Nguyen","key":"ref37"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1111\/j.1751-5823.2001.tb00457.x"},{"key":"ref39","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schulman","year":"2015"},{"key":"ref40","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Haarnoja","year":"2018"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10342038"},{"article-title":"AWAC: Accelerating online RL with offline datasets","year":"2020","author":"Nair","key":"ref42"},{"article-title":"Diffusion models are evolutionary algorithms","year":"2024","author":"Zhang","key":"ref43"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611021"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7083369\/11293803\/11315132.pdf?arnumber=11315132","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T05:39:28Z","timestamp":1767677968000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11315132\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2]]},"references-count":44,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/lra.2025.3648611","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"type":"electronic","value":"2377-3766"},{"type":"electronic","value":"2377-3774"}],"subject":[],"published":{"date-parts":[[2026,2]]}}}