{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T04:51:11Z","timestamp":1761367871833,"version":"build-2065373602"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T00:00:00Z","timestamp":1759190400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T00:00:00Z","timestamp":1759190400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,9,30]]},"DOI":"10.1109\/humanoids65713.2025.11203117","type":"proceedings-article","created":{"date-parts":[[2025,10,24]],"date-time":"2025-10-24T17:15:50Z","timestamp":1761326150000},"page":"389-396","source":"Crossref","is-referenced-by-count":0,"title":["Intention: Inferring Tendencies of Humanoid Robot Motion Through Interactive Intuition and Grounded VLM"],"prefix":"10.1109","author":[{"given":"Jin","family":"Wang","sequence":"first","affiliation":[{"name":"Humanoids and Human-Centered Mechatronics (HHCM), Istituto Italiano di Tecnologia,Genoa,Italy"}]},{"given":"Weijie","family":"Wang","sequence":"additional","affiliation":[{"name":"Humanoids and Human-Centered Mechatronics (HHCM), Istituto Italiano di Tecnologia,Genoa,Italy"}]},{"given":"Boyuan","family":"Deng","sequence":"additional","affiliation":[{"name":"Humanoids and Human-Centered Mechatronics (HHCM), Istituto Italiano di Tecnologia,Genoa,Italy"}]},{"given":"Heng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Istituto Italiano di Tecnologia,Human-Robot Interfaces and Interaction Lab,Genoa,Italy"}]},{"given":"Rui","family":"Dai","sequence":"additional","affiliation":[{"name":"Humanoids and Human-Centered Mechatronics (HHCM), Istituto Italiano di Tecnologia,Genoa,Italy"}]},{"given":"Nikos","family":"Tsagarakis","sequence":"additional","affiliation":[{"name":"Humanoids and Human-Centered Mechatronics (HHCM), Istituto Italiano di Tecnologia,Genoa,Italy"}]}],"member":"263","reference":[{"volume-title":"Learning physical intuition for robotic manipulation","year":"2021","author":"Groth","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3068908"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2023.3324580"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2023.2168134"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1177\/02783649241312698"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3396368"},{"key":"ref7","article-title":"Bresa: Bio-inspired reflexive safe reinforcement learning for contact-rich robotic tasks","author":"Zhang","year":"2025","journal-title":"arXiv preprint"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CCAC58200.2023.10333476"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/s43154-022-00082-9"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2024.2408593"},{"volume-title":"Robonursevla: Robotic scrub nurse system based on vision-language-action model","year":"2024","author":"Li","key":"ref11"},{"volume-title":"Lohoravens: A long-horizon language-conditioned benchmark for robotic tabletop manipulation","year":"2023","author":"Zhang","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160591"},{"key":"ref14","article-title":"Language to rewards for robotic skill synthesis","volume-title":"7th Annual Conference on Robot Learning (CoRL)","author":"Yu","year":"2023"},{"key":"ref15","article-title":"Saytap: Language to quadrupedal locomotion","volume-title":"7th Annual Conference on Robot Learning","author":"Tang","year":"2023"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610455"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.049"},{"key":"ref18","article-title":"Robots that ask for help: Uncertainty alignment for llm planners","volume-title":"Proc. Conf. Robot Learn. (CoRL)","author":"Ren","year":"2023"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/cdc56724.2024.10885862"},{"key":"ref20","article-title":"Multi-resolution sensing for real-time control with vision-language models","volume-title":"7th Annual Conference on Robot Learning","author":"Saxena","year":"2023"},{"key":"ref21","article-title":"Gesture-informed robot assistance via foundation models","volume-title":"7th Annual Conference on Robot Learning","author":"Lin","year":"2023"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10802344"},{"key":"ref23","article-title":"Bootstrap your own skills: Learning to solve new tasks with large language model guidance","volume-title":"7th Annual Conference on Robot Learning","author":"Zhang","year":"2023"},{"key":"ref24","article-title":"Do as i can, not as i say: Grounding language in robotic affordances","volume-title":"6th Annual Conference on Robot Learning (CoRL)","author":"Ichter","year":"2022"},{"key":"ref25","article-title":"Grounding language models in autonomous loco-manipulation tasks","author":"Wang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref26","article-title":"Distilled feature fields enable few-shot language-guided manipulation","volume-title":"7th Annual Conference on Robot Learning","author":"Shen","year":"2023"},{"key":"ref27","article-title":"Voxposer: Composable 3d value maps for robotic manipulation with language models","author":"Huang","year":"2023","journal-title":"arXiv preprint"},{"key":"ref28","article-title":"Language embedded radiance fields for zero-shot task-oriented grasping","volume-title":"7th Annual Conference on Robot Learning","author":"Rashid","year":"2023"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01370"},{"key":"ref30","article-title":"Open x-embodiment: Robotic learning datasets and rt-x models","author":"Padalkar","year":"2023","journal-title":"arXiv preprint"},{"journal-title":"Gemini robotics: Bringing ai into the physical world","year":"2025","author":"Team","key":"ref31"},{"journal-title":"Openvla: An open-source vision-language-action model","year":"2024","author":"Kim","key":"ref32"},{"journal-title":"Citekey: gptvision","article-title":"Gpt-4v (ision) system card","year":"2023","key":"ref33"},{"key":"ref34","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"International conference on machine learning","author":"Radford","year":"2021"},{"key":"ref35","article-title":"HYPERmotion: Learning hybrid behavior planning for autonomous loco-manipulation","volume-title":"8th Annual Conference on Robot Learning","author":"Wang","year":"2024"}],"event":{"name":"2025 IEEE-RAS 24th International Conference on Humanoid Robots (Humanoids)","start":{"date-parts":[[2025,9,30]]},"location":"Seoul, Korea, Republic of","end":{"date-parts":[[2025,10,2]]}},"container-title":["2025 IEEE-RAS 24th International Conference on Humanoid Robots (Humanoids)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11202977\/11203009\/11203117.pdf?arnumber=11203117","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T04:46:38Z","timestamp":1761367598000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11203117\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,30]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/humanoids65713.2025.11203117","relation":{},"subject":[],"published":{"date-parts":[[2025,9,30]]}}}