{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T14:56:46Z","timestamp":1773413806172,"version":"3.50.1"},"reference-count":44,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Robotics Programme","award":["M23NBK0053"],"award-info":[{"award-number":["M23NBK0053"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1109\/lra.2025.3595034","type":"journal-article","created":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T18:16:19Z","timestamp":1754072179000},"page":"9726-9733","source":"Crossref","is-referenced-by-count":1,"title":["DISCO: Language-Guided Manipulation With Diffusion Policies and Constrained Inpainting"],"prefix":"10.1109","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-7653-9713","authenticated-orcid":false,"given":"Ce","family":"Hao","sequence":"first","affiliation":[{"name":"School of Computing, National University of Singapore, Singapore"}]},{"given":"Kelvin","family":"Lin","sequence":"additional","affiliation":[{"name":"School of Computing, National University of Singapore, Singapore"}]},{"given":"Zhiwei","family":"Xue","sequence":"additional","affiliation":[{"name":"School of Computing, National University of Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6696-5513","authenticated-orcid":false,"given":"Siyuan","family":"Luo","sequence":"additional","affiliation":[{"name":"School of Computing, National University of Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3278-0035","authenticated-orcid":false,"given":"Harold","family":"Soh","sequence":"additional","affiliation":[{"name":"School of Computing, National University of Singapore, Singapore"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.026"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.067"},{"key":"ref3","first-page":"9902","article-title":"Planning with diffusion for flexible behavior synthesis","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Janner"},{"key":"ref4","article-title":"CHD: Coupled hierarchical diffusion for long-horizon tasks","author":"Hao","year":"2025"},{"key":"ref5","article-title":"Language-conditioned learning for robotic manipulation: A survey","author":"Zhou","year":"2023"},{"key":"ref6","first-page":"13139","article-title":"Language-conditioned imitation learning for robot manipulation tasks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Stepputtis","year":"2020"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01710"},{"key":"ref8","first-page":"3766","article-title":"Scaling up and distilling down: Language-guided robot skill acquisition","volume-title":"Proc. Conf. Robot Learn.","author":"Ha","year":"2023"},{"key":"ref9","article-title":"Moka: Open-vocabulary robotic manipulation through mark-based visual prompting","volume-title":"Proc. 1st Workshop Vis.-Lang. Models Navigation Manipulation ICRA 2024","author":"Liu"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2023.xix.025"},{"key":"ref11","first-page":"785","article-title":"Perceiver-actor: A multi-task transformer for robotic manipulation","volume-title":"Proc. Conf. Robot Learn.","author":"Shridhar","year":"2023"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-023-10131-7"},{"key":"ref13","article-title":"ManiPose: A comprehensive benchmark for pose-aware object manipulation in robotics","author":"Yu","year":"2024"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"ref15","article-title":"A theoretical justification for image inpainting using denoising diffusion probabilistic models","author":"Rout","year":"2023"},{"key":"ref16","first-page":"3242","article-title":"Compositional diffusion-based continuous constraint solvers","volume-title":"Proc. Conf. Robot Learn.","author":"Yang"},{"key":"ref17","first-page":"906","article-title":"Multiple interactions made easy (mime): Large scale demonstrations data for imitation","volume-title":"Proc. Conf. Robot Learn.","author":"Sharma","year":"2018"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161569"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2024.xx.122"},{"key":"ref20","article-title":"RT-trajectory: Robotic task generalization via hindsight trajectory sketches","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Gu"},{"key":"ref21","first-page":"8469","article-title":"PaLM-E: An embodied multimodal language model","volume-title":"Proc. 40th Int. Conf. Mach. Learn.","author":"Driess"},{"key":"ref22","first-page":"540","article-title":"VoxPoser: Composable 3D value maps for robotic manipulation with language models","volume-title":"Proc. Conf. Robot Learn.","author":"Huang"},{"key":"ref23","first-page":"2165","article-title":"Rt-2: Vision-language-action models transfer web knowledge to robotic control","volume-title":"Proc. Conf. Robot Learn.","author":"Zitkovich"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160591"},{"key":"ref25","article-title":"Classifier-free diffusion guidance","volume-title":"Proc. NeurIPS 2021 Workshop Deep Generative Models Downstream Appl","author":"Ho","year":"2021"},{"key":"ref26","article-title":"Language control diffusion: Efficiently scaling through space, time, and tasks","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Zhang","year":"2022"},{"key":"ref27","first-page":"287","article-title":"Do as I can, not as I say: Grounding language in robotic affordances","volume-title":"Proc. Conf. Robot Learn.","author":"Ichter"},{"key":"ref28","article-title":"Closed-loop open-vocabulary mobile manipulation with GPT-4V","author":"Zhi","year":"2024"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s11063-019-10163-0"},{"key":"ref30","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ho","year":"2020"},{"key":"ref31","article-title":"Planning as in-painting: A diffusion-based embodied task planning framework for environments under uncertainty","author":"Yang","year":"2023"},{"key":"ref32","first-page":"1949","article-title":"3D Diffuser actor: Policy diffusion with 3D scene representations","volume-title":"Proc. Conf. Robot Learn.","author":"Ke"},{"key":"ref33","article-title":"Image inpainting via tractable steering of diffusion models","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Liu"},{"key":"ref34","first-page":"894","article-title":"CLIPort: What and where pathways for robotic manipulation","volume-title":"Proc. Conf. Robot Learn.","author":"Shridhar","year":"2022"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01710"},{"key":"ref36","first-page":"158","article-title":"Implicit behavioral cloning","volume-title":"Proc. Conf. Robot Learn.","author":"Florence","year":"2022"},{"key":"ref37","first-page":"22955","article-title":"Behavior transformers: Cloning $ k$ modes with one stone","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Shafiullah","year":"2022"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161569"},{"key":"ref39","article-title":"Vision-language foundation models as effective robot imitators","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Li","year":"2023"},{"key":"ref40","article-title":"Zero-shot robotic manipulation with pre-trained image-editing diffusion models","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Black","year":"2023"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3180108"},{"key":"ref42","article-title":"OpenVLA: An open-source vision-language-action model","volume-title":"Proc. 8th Annu. Conf. Robot Learn","author":"Kim","year":"2024"},{"key":"ref43","article-title":"0: A vision-language-action flow model for general robot control","author":"Black","year":"2024"},{"key":"ref44","first-page":"56619","article-title":"Deer-VLA: Dynamic inference of multimodal large language models for efficient robot execution","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yue","year":"2024"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7083369\/11125679\/11106699.pdf?arnumber=11106699","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,19]],"date-time":"2025-08-19T04:46:03Z","timestamp":1755578763000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11106699\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":44,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/lra.2025.3595034","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}