{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T14:30:52Z","timestamp":1776090652963,"version":"3.50.1"},"reference-count":48,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11128775","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"12549-12556","source":"Crossref","is-referenced-by-count":2,"title":["AlignBot: Aligning VLM-Powered Customized Task Planning with User Reminders Through Fine-Tuning for Household Robots"],"prefix":"10.1109","author":[{"family":"Zhaxizhuoma","sequence":"first","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory"}]},{"given":"Pengan","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory"}]},{"given":"Ziniu","family":"Wu","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory"}]},{"given":"Jiawei","family":"Sun","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory"}]},{"given":"Dong","family":"Wang","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory"}]},{"given":"Peng","family":"Zhou","sequence":"additional","affiliation":[{"name":"The University of Hong Kong"}]},{"given":"Nieqing","family":"Cao","sequence":"additional","affiliation":[{"name":"Xi&#x2019; an Jiaotong-Liverpool University"}]},{"given":"Yan","family":"Ding","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory"}]},{"given":"Bin","family":"Zhao","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory"}]},{"given":"Xuelong","family":"Li","sequence":"additional","affiliation":[{"name":"Shanghai Artificial Intelligence Laboratory"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10342169"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-023-10133-5"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-023-10139-z"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/tetci.2023.3297838"},{"key":"ref5","volume-title":"Palm-e: An embodied multimodal language model","author":"Driess","year":"2023"},{"key":"ref6","volume-title":"Chatgpt for robotics: Design principles and model abilities","author":"Vemprala","year":"2023"},{"key":"ref7","volume-title":"Rekep: Spatio-temporal reasoning of relational keypoint constraints for robotic manipulation","author":"Huang","year":"2024"},{"issue":"8","key":"ref8","doi-asserted-by":"crossref","first-page":"1345","DOI":"10.1007\/s10514-023-10131-7","article-title":"Text2motion: from natural language instructions to feasible plans","volume":"47","author":"Lin","year":"2023","journal-title":"Autonomous Robots"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"95060","DOI":"10.1109\/ACCESS.2023.3310935","article-title":"Chatgpt empowered long-step robot control in various environments: A case application","volume":"11","author":"Wake","year":"2023","journal-title":"IEEE Access"},{"key":"ref10","volume-title":"Autort: Embodied foundation models for large scale orchestration of robotic agents","author":"Ahn","year":"2024"},{"key":"ref11","article-title":"Palm-e: An embodied multimodallanguage model","author":"Driess","year":"2023","journal-title":"arXiv preprint"},{"key":"ref12","article-title":"Minigpt-4: En-hancing vision-language understanding with advanced large language models","author":"Zhu","year":"2023","journal-title":"arXiv preprint"},{"key":"ref13","volume-title":"Proreason: Multi-modal proactive reasoning with decoupled eyesight and wisdom","author":"Zhou","year":"2024"},{"key":"ref14","volume-title":"Gpt-40: A multimodal task planning model","year":"2024"},{"key":"ref15","volume-title":"Robots that ask for help: Uncertainty alignment for large language model planners","author":"Ren","year":"2023"},{"key":"ref16","volume-title":"Reflect: Summarizing robot experiences for failure explanation and correction","author":"Liu","year":"2023"},{"key":"ref17","article-title":"LIm-personalize: Aligning 11m planners with human preferences via reinforced self-training for housekeeping robots","author":"Han","year":"2024","journal-title":"arXiv preprint"},{"key":"ref18","volume-title":"Yell at your robot: Improving on-the-fly from language corrections","author":"Shi","year":"2024"},{"key":"ref19","article-title":"Closed-loop open-vocabulary mobile manipulation with gpt-4v","author":"Zhi","year":"2024","journal-title":"arXiv preprint"},{"key":"ref20","article-title":"Look before you leap: Unveiling the power of gpt-4v in robotic vision-language planning","author":"Hu","year":"2023","journal-title":"arXiv preprint"},{"key":"ref21","article-title":"Can feedback enhance semantic grounding in large vision-language models?","author":"Liao","year":"2024","journal-title":"arXiv preprint"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICMA61710.2024.10633088"},{"key":"ref23","volume-title":"Direct preference optimization: Your language model is secretly a reward model","author":"Rafailov","year":"2024"},{"key":"ref24","doi-asserted-by":"crossref","DOI":"10.1007\/s11370-024-00550-5","article-title":"A survey on integration of large language models with intelligent robots","volume-title":"Intelligent Service Robotics","author":"Kim","year":"2024"},{"key":"ref25","volume-title":"Towards human awareness in robot task planning with large language models","author":"Liu","year":"2024"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.02484"},{"key":"ref27","article-title":"LoRA: Low-rank adaptation of large language models","volume-title":"International Conference on Learning Representations","author":"Hu","year":"2022"},{"key":"ref28","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/2024.findings-acl.598","volume-title":"An empirical study on parameter-efficient fine-tuning for multimodal large language models","author":"Zhou","year":"2024"},{"key":"ref29","volume-title":"Mos: Unleashing parameter efficiency of low-rank adaptation with mixture of shards","author":"Wang","year":"2025"},{"key":"ref30","volume-title":"Prolora: Partial rotation empowers more parameter-efficient lora","author":"Wang","year":"2024"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-99-8391-9_6"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611112"},{"key":"ref33","article-title":"Dkprompt: Domain knowledge prompting vision-language models for open-world planning","author":"Zhang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref34","volume-title":"Explore until confident: Efficient exploration for embodied question answering","author":"Ren","year":"2024"},{"key":"ref35","volume-title":"Progprompt: Generating situated robot task plans using large language models","author":"Singh","year":"2022"},{"key":"ref36","volume-title":"Copal: Corrective planning of robot actions with large language models","author":"Joublin","year":"2023"},{"key":"ref37","volume-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"key":"ref38","volume-title":"Robotic control via embodied chain-of-thought reasoning","author":"Zawalski","year":"2024"},{"key":"ref39","article-title":"Llm+ p: Empowering large language models with optimal planning proficiency","author":"Liu","year":"2023","journal-title":"arXiv preprint"},{"key":"ref40","article-title":"Translating natural language to planning goals with large-language models","author":"Xie","year":"2023","journal-title":"ar Xiv preprint arXiv"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1631\/FITEE.1800514"},{"key":"ref42","article-title":"Open-loop VLM robot planning: An investigation of fine-tuning and prompt engineering strategies","volume-title":"First Workshop on Vision-Language Models for Navigation and Manipulation at ICRA 2024","author":"Akiyama","year":"2024"},{"key":"ref43","article-title":"Collage prompting: Budget-friendly visual recognition with gpt-4v","author":"Xu","year":"2024","journal-title":"arXiv preprint"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i3.32318"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/s44267-025-00086-x"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1108\/eb026526"},{"key":"ref47","doi-asserted-by":"crossref","DOI":"10.15607\/RSS.2023.XIX.016","volume-title":"Learning fine-grained bimanual manipulation with low-cost hardware","author":"Zhao","year":"2023"},{"key":"ref48","volume-title":"Anygrasp: Robust and efficient grasp perception in spatial and temporal domains","author":"Fang","year":"2023"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","location":"Atlanta, GA, USA","start":{"date-parts":[[2025,5,19]]},"end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11128775.pdf?arnumber=11128775","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T19:49:43Z","timestamp":1775245783000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11128775\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":48,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11128775","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}