{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T21:19:05Z","timestamp":1776719945079,"version":"3.51.2"},"reference-count":34,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"JSPS KAKENHI","award":["23K28168"],"award-info":[{"award-number":["23K28168"]}]},{"name":"JST Moonshot"},{"name":"JSPS Fellows","award":["JP23KJ1917"],"award-info":[{"award-number":["JP23KJ1917"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1109\/lra.2026.3682441","type":"journal-article","created":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T19:48:20Z","timestamp":1775764100000},"page":"6767-6774","source":"Crossref","is-referenced-by-count":0,"title":["LILAC: Language-Conditioned Object-Centric Optical Flow for Open-Loop Trajectory Generation"],"prefix":"10.1109","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1991-9119","authenticated-orcid":false,"given":"Motonari","family":"Kambara","sequence":"first","affiliation":[{"name":"Keio University, Yokohama, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6971-8512","authenticated-orcid":false,"given":"Koki","family":"Seno","sequence":"additional","affiliation":[{"name":"Keio University, Yokohama, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6765-9238","authenticated-orcid":false,"given":"Tomoya","family":"Kaichi","sequence":"additional","affiliation":[{"name":"KDDI Research Inc., Saitama, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6562-0487","authenticated-orcid":false,"given":"Yanan","family":"Wang","sequence":"additional","affiliation":[{"name":"KDDI Research Inc., Saitama, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0261-0510","authenticated-orcid":false,"given":"Komei","family":"Sugiura","sequence":"additional","affiliation":[{"name":"Keio University, Yokohama, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.120"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2022.xviii.063"},{"key":"ref3","first-page":"1723","article-title":"BridgeData V2: A dataset for robot learning at scale","volume-title":"Proc. Conf. Robot Learn.","author":"Walke","year":"2023"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.025"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611477"},{"key":"ref6","article-title":"OpenVLA: An open-source vision-language-action model","volume-title":"Proc. Conf. Robot Learn.","author":"Kim","year":"2024"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2025.xxi.010"},{"key":"ref8","article-title":"Octo: An open-source generalist robot policy","volume-title":"Proc. Robot.: Sci. Syst.","author":"Team","year":"2024"},{"key":"ref9","article-title":"CogACT: A foundational vision-language-action model for synergizing cognition and action in robotic manipulation","author":"Li","year":"2024"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2025.3544909"},{"key":"ref11","article-title":"LLaRA: Supercharging robot learning data for vision-language policy","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Li","year":"2025"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2025.2469689"},{"key":"ref13","first-page":"3242","article-title":"Task success prediction for open-vocabulary manipulation based on multi-level aligned representations","volume-title":"Proc. Conf. Robot Learn.","author":"Goko","year":"2024"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73116-7_18"},{"key":"ref15","article-title":"FLIP: Flow-centric generative planning for general-purpose manipulation Tasks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Gao","year":"2025"},{"key":"ref16","first-page":"2475","article-title":"Flow as the cross-domain manipulation interface","volume-title":"Proc. Conf. Robot Learn.","author":"Xu","year":"2024"},{"key":"ref17","article-title":"Evo-1: Lightweight vision-language-action model with preserved semantic alignment","author":"Lin","year":"2025"},{"key":"ref18","first-page":"17","article-title":"$\\pi _{0.5}$: A vision-language-action model with open-world generalization","volume-title":"Proc. Conf. Robot Learn.","author":"Intelligence","year":"2025"},{"key":"ref19","first-page":"8469","article-title":"PaLM-E: An embodied multimodal language model","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Driess","year":"2023"},{"key":"ref20","first-page":"540","article-title":"VoxPoser: Composable 3D value maps for robotic manipulation with language models","volume-title":"Proc. Conf. Robot Learn.","author":"Huang","year":"2023"},{"key":"ref21","article-title":"VLA-0: Building state-of-the-art VLAs with zero modification","author":"Goyal","year":"2025"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v40i22.38931"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2024.xx.092"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12342"},{"key":"ref25","first-page":"2630","article-title":"HowTo100 M: Learning a text-video embedding by watching hundred million narrated video clips","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis.","author":"Miech","year":"2019"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2991965"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.622"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2025.3527290"},{"key":"ref29","first-page":"547","article-title":"RAM: Retrieval-based affordance transfer for generalizable zero-shot robotic manipulation","volume-title":"Proc. Conf. Robot Learn.","author":"Kuang","year":"2024"},{"key":"ref30","article-title":"Qwen2.5-VL technical report","author":"Bai","year":"2025"},{"key":"ref31","article-title":"CoTracker3: Simpler and better point tracking by pseudo-labelling real videos","author":"Karaev","year":"2024"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1186\/s40648-019-0132-3"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2015.08.002"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00987"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7083369\/11481819\/11478289.pdf?arnumber=11478289","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T20:06:20Z","timestamp":1776715580000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11478289\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":34,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/lra.2026.3682441","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,6]]}}}