{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T12:40:37Z","timestamp":1766061637550,"version":"3.48.0"},"reference-count":45,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iros60139.2025.11246507","type":"proceedings-article","created":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:54:45Z","timestamp":1764269685000},"page":"17688-17695","source":"Crossref","is-referenced-by-count":0,"title":["Keypoint-Aware RAG for Robotic Manipulation: In-Context Constraint Learning via Large-Scale Retrieval"],"prefix":"10.1109","author":[{"given":"Jiuzhou","family":"Lin","sequence":"first","affiliation":[{"name":"Tsinghua University,Tsinghua Shenzhen International Graduate School,Shenzhen,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qi","family":"Yang","sequence":"additional","affiliation":[{"name":"Tsinghua University,Tsinghua Shenzhen International Graduate School,Shenzhen,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yizhe","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua University,Tsinghua Shenzhen International Graduate School,Shenzhen,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kangkang","family":"Dong","sequence":"additional","affiliation":[{"name":"Jianghuai Advanced Technology Center,Hefei,China,230000"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Houde","family":"Liu","sequence":"additional","affiliation":[{"name":"Tsinghua University,Tsinghua Shenzhen International Graduate School,Shenzhen,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"article-title":"Gpt-4o system card","year":"2024","author":"Hurst","key":"ref2"},{"article-title":"Gpt-4 technical report","year":"2023","author":"Achiam","key":"ref3"},{"article-title":"Look before you leap: Unveiling the power of gpt-4v in robotic vision-language planning","year":"2023","author":"Hu","key":"ref4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160591"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10801352"},{"article-title":"Moka: Open-vocabulary robotic manipulation through mark-based visual prompting","volume-title":"First Workshop on Vision-Language Models for Navigation and Manipulation at ICRA 2024","author":"Liu","key":"ref7"},{"article-title":"Rekep: Spatio-temporal reasoning of relational keypoint constraints for robotic manipulation","year":"2024","author":"Huang","key":"ref8"},{"key":"ref9","first-page":"2165","article-title":"Rt-2: Vision-language-action models transfer web knowledge to robotic control","volume-title":"Conference on Robot Learning","author":"Zitkovich"},{"article-title":"Open-vla: An open-source vision-language-action model","year":"2024","author":"Kim","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2023.xix.025"},{"article-title":"Open x-embodiment: Robotic learning datasets and rt-x models","year":"2023","author":"O\u2019Neill","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2024.xx.096"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA55743.2025.11128807"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.016"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1177\/02783649241273668"},{"key":"ref17","first-page":"9459","article-title":"Retrieval-augmented generation for knowledge-intensive nlp tasks","volume":"33","author":"Lewis","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-95459-8_9"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/iros60139.2025.11246854"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2025.XXI.161"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3571730"},{"article-title":"From local to global: A graph rag approach to query-focused summarization","year":"2024","author":"Edge","key":"ref22"},{"article-title":"Self-rag: Learning to retrieve, generate, and critique through self-reflection","volume-title":"The Twelfth International Conference on Learning Representations","author":"Asai","key":"ref23"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10801542"},{"article-title":"Embodied-rag: General non-parametric embodied memory for retrieval and generation","year":"2025","author":"Xie","key":"ref25"},{"issue":"140","key":"ref26","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"Journal of machine learning research"},{"article-title":"Large language models as general pattern machines","volume-title":"Proceedings of the 7th Conference on Robot Learning (CoRL)","author":"Mirchandani","key":"ref27"},{"article-title":"Incoro: In-context learning for robotics control with feedback loops","year":"2024","author":"Zhu","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/icra55743.2025.11128681"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA55743.2025.11127585"},{"article-title":"Tracevla: Visual trace prompting enhances spatial-temporal awareness for generalist robotic policies","year":"2024","author":"Zheng","key":"ref31"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52734.2025.00649"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10801542"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73033-7_2"},{"article-title":"Grounded sam: Assembling open-world models for diverse visual tasks","year":"2024","author":"Ren","key":"ref35"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3140817"},{"article-title":"Set-of-mark prompting unleashes extraordinary visual grounding in gpt-4v","year":"2023","author":"Yang","key":"ref37"},{"key":"ref38","doi-asserted-by":"crossref","DOI":"10.15607\/RSS.2024.XX.055","article-title":"Rvt2: Learning precise manipulation from few demonstrations","author":"Goyal","year":"2024","journal-title":"RSS"},{"article-title":"Act3d: 3d feature field transformers for multi-task robotic manipulation","year":"2023","author":"Gervet","key":"ref39"},{"article-title":"Voxposer: Composable 3d value maps for robotic manipulation with language models","year":"2023","author":"Huang","key":"ref40"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-74048-3_4"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.3233\/IDA-2007-11508"},{"article-title":"Viola: Imitation learning for vision-based manipulation with object proposal priors","volume-title":"6th Annual Conference on Robot Learning (CoRL)","author":"Zhu","key":"ref43"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3146589"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01618"}],"event":{"name":"2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2025,10,19]]},"location":"Hangzhou, China","end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11245651\/11245652\/11246507.pdf?arnumber=11246507","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T12:36:37Z","timestamp":1766061397000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11246507\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/iros60139.2025.11246507","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}