{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T15:53:46Z","timestamp":1776182026951,"version":"3.50.1"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10161534","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T17:20:56Z","timestamp":1688491256000},"page":"11509-11522","source":"Crossref","is-referenced-by-count":113,"title":["Open-vocabulary Queryable Scene Representations for Real World Planning"],"prefix":"10.1109","author":[{"given":"Boyuan","family":"Chen","sequence":"first","affiliation":[{"name":"Everyday Robots"}]},{"given":"Fei","family":"Xia","sequence":"additional","affiliation":[{"name":"Robotics at Google"}]},{"given":"Brian","family":"Ichter","sequence":"additional","affiliation":[{"name":"Robotics at Google"}]},{"given":"Kanishka","family":"Rao","sequence":"additional","affiliation":[{"name":"Robotics at Google"}]},{"given":"Keerthana","family":"Gopalakrishnan","sequence":"additional","affiliation":[{"name":"Robotics at Google"}]},{"given":"Michael S.","family":"Ryoo","sequence":"additional","affiliation":[{"name":"Robotics at Google"}]},{"given":"Austin","family":"Stone","sequence":"additional","affiliation":[{"name":"Robotics at Google"}]},{"given":"Daniel","family":"Kappler","sequence":"additional","affiliation":[{"name":"Everyday Robots"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ISMAR.2018.00024"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561548"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00945"},{"key":"ref34","article-title":"Pddlstream: Integrating symbolic planners and blackbox samplers via optimistic adaptive planning","year":"2020","journal-title":"Proceedings of the International Conference on Automated Planning and Scheduling"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00446"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/280765.280773"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2018.00015"},{"key":"ref36","article-title":"Socratic models: Composing zero-shot multimodal reasoning with language","author":"zeng","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref31","article-title":"Zson: Zero-shot object-goal navigation using multimodal goal embeddings","author":"majumdar","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref30","article-title":"Learning object-conditioned exploration using distributed soft actor critic","author":"wahid","year":"2020","journal-title":"CoRR"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989203"},{"key":"ref33","article-title":"Ffrob: An efficient heuristic for task and motion planning","author":"garrett","year":"0","journal-title":"Algorithmic Foundations of Robotics XI"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2873617"},{"key":"ref32","article-title":"Clip on wheels: Zero-shot object navigation as object localization and exploration","author":"gadre","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref2","article-title":"On evaluation of embodied navigation agents","author":"anderson","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref1","article-title":"Object goal navigation using goal-oriented semantic exploration","author":"chaplot","year":"2020","journal-title":"Neural Information Processing Systems"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01112"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00033"},{"key":"ref38","article-title":"Palm: Scaling language modeling with pathways","author":"chowdhery","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00743"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.010"},{"key":"ref24","article-title":"Scene memory transformer for embodied agents in long time horizon tasks","author":"fang","year":"2019","journal-title":"CVPR 2019"},{"key":"ref23","article-title":"Neural topological slam for visual navigation","author":"chaplot","year":"2020","journal-title":"CVPR"},{"key":"ref26","article-title":"Visual-based obstacle detection: a pur-posive approach using the normal ow","author":"santos-victor","year":"1995","journal-title":"Proc of the International Con-ference on Intelligent Autonomous Systems"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2003.1248826"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00576"},{"key":"ref22","article-title":"Learning to explore using active neural slam","author":"chaplot","year":"2020","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref21","article-title":"Visual language maps for robot navigation","author":"huang","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref28","article-title":"Learning exploration policies for nav-igation","author":"chen","year":"2019","journal-title":"International Conference on Learning Representations"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793493"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01832"},{"key":"ref8","article-title":"Lm-nav: Robotic navigation with large pre-trained models of language, vision, and action","author":"shah","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref7","article-title":"Inner monologue: Embodied reasoning through planning with language models","author":"huang","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2011.6048293"},{"key":"ref4","article-title":"Open-vocabulary object detection via vision and language knowledge distillation","author":"gu","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref3","article-title":"Learning transferable visual models from natural language supervision","author":"radford","year":"2021","journal-title":"International Conference on Machine Learning"},{"key":"ref6","article-title":"Do as i can and not as i say: Grounding language in robotic affordances","author":"ahn","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref5","article-title":"Language models as zero-shot planners: Extracting actionable knowledge for embodied agents","author":"huang","year":"2022","journal-title":"ArXiv Preprint"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","location":"London, United Kingdom","start":{"date-parts":[[2023,5,29]]},"end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10161534.pdf?arnumber=10161534","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T17:31:21Z","timestamp":1690219881000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10161534\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10161534","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}