{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T20:57:11Z","timestamp":1774645031100,"version":"3.50.1"},"reference-count":44,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","award":["N00014-25-1-2322"],"award-info":[{"award-number":["N00014-25-1-2322"]}],"id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1109\/lra.2026.3673995","type":"journal-article","created":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T19:55:25Z","timestamp":1773431725000},"page":"5534-5541","source":"Crossref","is-referenced-by-count":0,"title":["Inference-Time Enhancement of Generative Robot Policies via Predictive World Modeling"],"prefix":"10.1109","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-5788-2603","authenticated-orcid":false,"given":"Han","family":"Qi","sequence":"first","affiliation":[{"name":"School of Engineering, Applied Sciences, Harvard University, Cambridge, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7131-9224","authenticated-orcid":false,"given":"Haocheng","family":"Yin","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0644-1412","authenticated-orcid":false,"given":"Aris","family":"Zhu","sequence":"additional","affiliation":[{"name":"School of Engineering, Applied Sciences, Harvard University, Cambridge, MA, USA"}]},{"given":"Yilun","family":"Du","sequence":"additional","affiliation":[{"name":"School of Engineering, Applied Sciences, Harvard University, Cambridge, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0074-7836","authenticated-orcid":false,"given":"Heng","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Engineering, Applied Sciences, Harvard University, Cambridge, MA, USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.026"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2025.3631816"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1177\/02783649241281508"},{"key":"ref4","first-page":"2679","article-title":"OpenVLA: An open-source vision-language-action model","volume-title":"Proc. 18th Conf. Robot. Learn.","author":"Kim","year":"2024"},{"key":"ref5","first-page":"3705","article-title":"Evaluating real-world robot manipulation policies in simulation","volume-title":"Proc. 8th Conf. Robot. Learn.","author":"Li","year":"2024"},{"key":"ref6","first-page":"8633","article-title":"Video diffusion models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ho","year":"2022"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.52202\/079017-1873"},{"key":"ref8","first-page":"1726","article-title":"LaDi-WM: A latent diffusion-based world model for predictive manipulation","volume-title":"Proc. 9th Conf. Robot. Learn.","author":"Huang","year":"2025"},{"key":"ref9","first-page":"647","article-title":"Mastering diverse domains through world models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hafner","year":"2024"},{"key":"ref10","first-page":"1","article-title":"Control-oriented clustering of visual latent representation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Qi","year":"2025"},{"key":"ref11","article-title":"From imitation to refinementresidual RL for precise assembly","volume-title":"Proc. Conf. Robot Learn. Workshop Learn. Diverse Demonstrations","author":"Ankile","year":"2024"},{"key":"ref12","first-page":"26991","article-title":"Behavior generation with latent actions","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lee","year":"2024"},{"key":"ref13","article-title":"Compose by focus: Scene graph-based atomic skills","volume-title":"Proc. IEEE Int. Conf. Robot. Autom.","author":"Qi","year":"2026"},{"key":"ref14","first-page":"51936","article-title":"Robogen: Towards unleashing infinite data for automated robot learning via generative simulation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang","year":"2024"},{"key":"ref15","first-page":"1","article-title":"GenSim: Generating robotic simulation tasks via large language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wang","year":"2023"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.010"},{"key":"ref17","first-page":"1","article-title":"Structdiffusion: Object-centric diffusion for semantic rearrangement of novel objects","volume-title":"Proc. Workshop Lang. Robot.","author":"Liu","year":"2022"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461184"},{"key":"ref19","first-page":"1","article-title":"Learning interactive real-world simulators","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yang","year":"2024"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.01453"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10342382"},{"key":"ref22","first-page":"33486","article-title":"Potential based diffusion motion planning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Luo","year":"2024"},{"key":"ref23","article-title":"Action-conditional video prediction using deep networks in atari games","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"28","author":"Oh","year":"2015"},{"key":"ref24","article-title":"World models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ha","year":"2018"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3045007"},{"key":"ref26","first-page":"9156","article-title":"Learning universal policies via text-guided video generation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Du","year":"2024"},{"key":"ref27","first-page":"3943","article-title":"Dreamitate: Real-world visuomotor policy learning via video generation","volume-title":"Proc. 8th Conf. Robot. Learn.","author":"Liang","year":"2024"},{"key":"ref28","first-page":"1","article-title":"Learning to act from actionless videos through dense correspondences","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Ko","year":"2024"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73116-7_18"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01472"},{"key":"ref31","first-page":"1","article-title":"Video language planning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Du","year":"2024"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA55743.2025.11128780"},{"key":"ref33","article-title":"Transformers are sample-efficient world models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Micheli","year":"2023"},{"key":"ref34","article-title":"TD-MPC2: Scalable, robust world models for continuous control","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hansen","year":"2024"},{"key":"ref35","article-title":"Dino-WM: World models on pre-trained visual features enable zero-shot planning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhou","year":"2024"},{"key":"ref36","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ho","year":"2020"},{"key":"ref37","volume-title":"System Identification: Theory for the User","author":"Lennart","year":"1999"},{"key":"ref38","first-page":"1","article-title":"Numerical optimal control","volume-title":"Proc. Optim. Eng. Center","author":"Diehl","year":"2011"},{"key":"ref39","article-title":"Learning multiple initial solutions to optimization problems","author":"Sharony","year":"2024"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989324"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2011.5979561"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.52202\/079017-1873"},{"key":"ref43","article-title":"ChatGPT-4o","year":"2024"},{"key":"ref44","article-title":"Steering your generalists: Improving robotic foundation models via value guidance","volume-title":"Proc. Conf. Robot. Learn.","author":"Nakamoto","year":"2024"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7083369\/11435997\/11433756.pdf?arnumber=11433756","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T19:52:45Z","timestamp":1774641165000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11433756\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5]]},"references-count":44,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/lra.2026.3673995","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,5]]}}}