{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,23]],"date-time":"2026-06-23T11:06:47Z","timestamp":1782212807610,"version":"3.54.5"},"reference-count":35,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"German Research Foundation DFG"},{"name":"Scholarship From the Ministry of Higher Education of the Arab Republic of Egypt"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1109\/lra.2024.3504341","type":"journal-article","created":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T19:22:02Z","timestamp":1732216922000},"page":"112-119","source":"Crossref","is-referenced-by-count":6,"title":["QT-TDM: Planning With Transformer Dynamics Model and Autoregressive Q-Learning"],"prefix":"10.1109","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4198-3889","authenticated-orcid":false,"given":"Mostafa","family":"Kotb","sequence":"first","affiliation":[{"name":"Knowledge Technology Group, Department of Informatics, Universit&#x00E4;t Hamburg, Hamburg, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5163-938X","authenticated-orcid":false,"given":"Cornelius","family":"Weber","sequence":"additional","affiliation":[{"name":"Knowledge Technology Group, Department of Informatics, Universit&#x00E4;t Hamburg, Hamburg, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1670-8962","authenticated-orcid":false,"given":"Muhammad Burhan","family":"Hafez","sequence":"additional","affiliation":[{"name":"School of Electronics and Computer Science, University of Southampton, Southampton, U.K."}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1343-4775","authenticated-orcid":false,"given":"Stefan","family":"Wermter","sequence":"additional","affiliation":[{"name":"Knowledge Technology Group, Department of Informatics, Universit&#x00E4;t Hamburg, Hamburg, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","first-page":"4759","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","volume-title":"Proc. Neural Inf. Process. Syst","author":"Chua","year":"2018"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561842"},{"key":"ref3","first-page":"1332","article-title":"Masked world models for visual control","volume-title":"Proc. Conf. Robot Learn.","author":"Seo","year":"2023"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10342018"},{"key":"ref5","first-page":"8583","article-title":"Planning to explore via self-supervised world models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Sekar","year":"2020"},{"key":"ref6","article-title":"Mastering diverse domains through world models","author":"Hafner","year":"2023"},{"key":"ref7","first-page":"5757","article-title":"Context-aware dynamics model for generalization in model-based reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lee","year":"2020"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1285"},{"key":"ref10","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Dosovitskiy","year":"2021"},{"key":"ref11","first-page":"15084","article-title":"Decision transformer: Reinforcement learning via sequence modeling","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Chen","year":"2021"},{"key":"ref12","article-title":"A generalist dynamics model for control","author":"Schubert","year":"2023"},{"key":"ref13","article-title":"Transformers are sample-efficient world models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Micheli","year":"2023"},{"key":"ref14","article-title":"Dream to control: Learning behaviors by latent imagination","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hafner","year":"2020"},{"key":"ref15","article-title":"Transformer-based world models are happy with 100k interactions","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Robine","year":"2023"},{"key":"ref16","article-title":"TransDreamer: Reinforcement learning with transformer world models","author":"Chen","year":"2022"},{"key":"ref17","first-page":"1273","article-title":"Offline reinforcement learning as one big sequence modeling problem","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Janner","year":"2021"},{"key":"ref18","first-page":"3909","article-title":"Q-transformer: Scalable offline reinforcement learning via autoregressive Q-functions","volume-title":"Proc. Conf. Robot Learn.","author":"Chebotar","year":"2023"},{"key":"ref19","first-page":"8387","article-title":"Temporal difference learning for model predictive control","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Hansen","year":"2022"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.simpa.2020.100022"},{"key":"ref21","first-page":"1094","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"Proc. Conf. Robot Learn.","author":"Yu","year":"2020"},{"key":"ref22","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Hafner","year":"2019"},{"key":"ref23","first-page":"6306","article-title":"Neural discrete representation learning","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Den","year":"2017"},{"key":"ref24","article-title":"Auto-encoding variational bayes","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kingma","year":"2014"},{"key":"ref25","article-title":"A generalist agent","author":"Reed","year":"2022","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref26","article-title":"Dinov2: Learning robust visual features without supervision","author":"Oquab"},{"key":"ref27","first-page":"2165","article-title":"RT-2: Vision-language-action models transfer web knowledge to robotic control","volume-title":"Proc. Conf. Robot Learn.","author":"Zitkovich","year":"2023"},{"key":"ref28","first-page":"8469","article-title":"PaLM-E: An Embodied multimodal Language Model","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Driess","year":"2023"},{"issue":"1","key":"ref29","first-page":"1","article-title":"A path towards autonomous machine intelligence","volume":"62","author":"LeCun","year":"2022","journal-title":"Open Rev."},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/BF00115009"},{"key":"ref32","first-page":"2254","article-title":"Monte Carlo augmented actor-critic for sparse reward deep reinforcement learning from suboptimal demonstrations","volume-title":"Proc. Neural Inf. Process. Syst.","author":"Wilcox","year":"2022"},{"key":"ref33","article-title":"TD-MPC2: Scalable, robust world models for continuous control","author":"Hansen","year":"2023"},{"key":"ref34","article-title":"minGPT: A minimal PyTorch re-implementation of the OpenAI GPT (generative pretrained transformer) training","author":"Karpathy","year":"2020"},{"key":"ref35","article-title":"Stop regressing: Training value functions via classification for scalable deep RL","author":"Farebrother","year":"2024"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7083369\/10768868\/10759753.pdf?arnumber=10759753","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T07:32:48Z","timestamp":1733297568000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10759753\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1]]},"references-count":35,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/lra.2024.3504341","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1]]}}}