{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T15:51:54Z","timestamp":1776095514628,"version":"3.50.1"},"reference-count":68,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100006754","name":"Army Research Laboratory","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006754","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10610243","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"5021-5028","source":"Crossref","is-referenced-by-count":170,"title":["ConceptGraphs: Open-Vocabulary 3D Scene Graphs for Perception and Planning"],"prefix":"10.1109","author":[{"given":"Qiao","family":"Gu","sequence":"first","affiliation":[{"name":"University of Toronto"}]},{"given":"Ali","family":"Kuwajerwala","sequence":"additional","affiliation":[{"name":"Universit&#x00E9; de Montr&#x00E9;al"}]},{"given":"Sacha","family":"Morin","sequence":"additional","affiliation":[{"name":"Universit&#x00E9; de Montr&#x00E9;al"}]},{"given":"Krishna Murthy","family":"Jatavallabhula","sequence":"additional","affiliation":[{"name":"MIT"}]},{"given":"Bipasha","family":"Sen","sequence":"additional","affiliation":[{"name":"Universit&#x00E9; de Montr&#x00E9;al"}]},{"given":"Aditya","family":"Agarwal","sequence":"additional","affiliation":[{"name":"Universit&#x00E9; de Montr&#x00E9;al"}]},{"given":"Corban","family":"Rivera","sequence":"additional","affiliation":[{"name":"JHU APL"}]},{"given":"William","family":"Paul","sequence":"additional","affiliation":[{"name":"JHU APL"}]},{"given":"Kirsty","family":"Ellis","sequence":"additional","affiliation":[{"name":"Universit&#x00E9; de Montr&#x00E9;al"}]},{"given":"Rama","family":"Chellappa","sequence":"additional","affiliation":[{"name":"JHU"}]},{"given":"Chuang","family":"Gan","sequence":"additional","affiliation":[{"name":"UMass"}]},{"given":"Celso Miguel","family":"de Melo","sequence":"additional","affiliation":[{"name":"DEVCOM ARL"}]},{"given":"Joshua B.","family":"Tenenbaum","sequence":"additional","affiliation":[{"name":"MIT"}]},{"given":"Antonio","family":"Torralba","sequence":"additional","affiliation":[{"name":"MIT"}]},{"given":"Florian","family":"Shkurti","sequence":"additional","affiliation":[{"name":"University of Toronto"}]},{"given":"Liam","family":"Paull","sequence":"additional","affiliation":[{"name":"Universit&#x00E9; de Montr&#x00E9;al"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ISMAR.2011.6092378"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2015.XI.001"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00066"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00617"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01245"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.445"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46487-9_31"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989538"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ISMAR.2018.00024"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2018.00015"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8967890"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2022.XVIII.013"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.069"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00594"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ISMAR55827.2022.00090"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/iccv51070.2023.00705"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2023.xix.066"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00085"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2024.3410324"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00677"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00219"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/3DV57658.2022.00056"},{"key":"ref23","first-page":"23311","article-title":"Decomposing nerf for editing via feature field distillation","volume":"35","author":"Kobayashi","year":"2022","journal-title":"Neural Information Processing Systems"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2023.xix.074"},{"key":"ref25","article-title":"Vl-fields: Towards language-grounded neural implicit spatial representations","author":"Tsagkas","year":"2023"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01807"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/icra48891.2023.10160969"},{"key":"ref28","article-title":"Distilled feature fields enable few-shot manipulation","volume-title":"International Conference on Robot Learning","author":"Shen"},{"key":"ref29","article-title":"Open-set 3d scene segmentation with rendered novel views","author":"Engelmann","year":"2023"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160800"},{"key":"ref31","article-title":"Learning transferable visual models from natural language supervision","volume-title":"International Conference on Machine Learning","author":"Radford"},{"key":"ref32","article-title":"Gpt-4 technical report","year":"2023"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref34","article-title":"Grounding dino: Marrying dino with grounded pre-training for open-set object detection","author":"Liu","year":"2023"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref36","article-title":"3d concept grounding on neural fields","author":"Hong","year":"2022","journal-title":"Neural Information Processing Systems"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00888"},{"key":"ref38","article-title":"3d-llm: Injecting the 3d world into large language models","author":"Hong","year":"2023","journal-title":"Neural Information Processing Systems"},{"key":"ref39","first-page":"894","article-title":"CLIPort: What and where pathways for robotic manipulation","volume-title":"Conference on Robot Learning","volume":"164","author":"Shridhar"},{"key":"ref40","article-title":"Language embedded radiance fields for zero-shot task-oriented grasping","volume-title":"International Conference on Robot Learning","author":"Sharma"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/icra48891.2023.10161534"},{"key":"ref42","article-title":"Clip on wheels: Zero-shot object navigation as object localization and exploration","author":"Gadre","year":"2022"},{"key":"ref43","article-title":"Lm-nav: Robotic navigation with large pre-trained models of language, vision, and action","volume-title":"International Conference on Robot Learning","author":"Shah"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/2010324.1964929"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-20893-6_21"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00576"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2931042"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00402"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1177\/02783649211056674"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2022.XVIII.050"},{"key":"ref51","article-title":"Scenegraph-fusion: Incremental 3d scene graph prediction from rgb-d sequences","volume-title":"Proceedings of Computer Vision and Pattern Recognition","author":"Wu"},{"key":"ref52","article-title":"Taskography: Evaluating robot task planning over large 3d scene graphs","volume-title":"International Conference on Robot Learning","author":"Agia"},{"key":"ref53","article-title":"Sayplan: Grounding large language models using 3d scene graphs for scalable task planning","volume-title":"International Conference on Robot Learning","author":"Rana"},{"key":"ref54","article-title":"Dinov2: Learning robust visual features without supervision","author":"Oquab","year":"2023"},{"key":"ref55","article-title":"Recognize anything: A strong image tagging model","author":"Zhang","year":"2023"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.33540\/2168"},{"key":"ref57","article-title":"The Replica dataset: A digital replica of indoor spaces","author":"Straub","year":"2019"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00695"},{"key":"ref59","article-title":"Language-driven semantic segmentation","volume-title":"International Conference on Learning Representations","author":"Li"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20059-5_31"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19815-1_40"},{"key":"ref62","article-title":"Compositional visual generation with energy based models","author":"Du","year":"2020","journal-title":"Neural Information Processing Systems"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1098\/rstb.2021.0447"},{"key":"ref64","article-title":"Ai2-thor: An interactive 3d environment for visual ai","author":"Kolve","year":"2017"},{"key":"ref65","first-page":"5982","article-title":"Procthor: Large-scale embodied ai using procedural generation","volume":"35","author":"Deitke","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref66","article-title":"Openmask3d: Open-vocabulary 3d instance segmentation","author":"Takmaz","year":"2023"},{"key":"ref67","article-title":"OVIR-3d: Open-vocabulary 3d instance retrieval without training on 3d data","volume-title":"International Conference on Robot Learning","author":"Lu"},{"key":"ref68","article-title":"Context-aware entity grounding with open-vocabulary 3d scene graphs","volume-title":"International Conference on Robot Learning","author":"Chang"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","location":"Yokohama, Japan","start":{"date-parts":[[2024,5,13]]},"end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10610243.pdf?arnumber=10610243","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T05:46:15Z","timestamp":1723268775000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10610243\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":68,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10610243","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}