{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,22]],"date-time":"2026-05-22T08:07:51Z","timestamp":1779437271527,"version":"3.53.1"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T00:00:00Z","timestamp":1769731200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T00:00:00Z","timestamp":1769731200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100004937","name":"Bundesministerium f\u00fcr Forschung und Technologie","doi-asserted-by":"publisher","award":["16IW2400"],"award-info":[{"award-number":["16IW2400"]}],"id":[{"id":"10.13039\/501100004937","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["K\u00fcnstl Intell"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s13218-026-00902-6","type":"journal-article","created":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T15:18:25Z","timestamp":1769786305000},"page":"111-117","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["LIEREx: Language-Image Embeddings for Robotic Exploration"],"prefix":"10.1007","volume":"40","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-9023-7528","authenticated-orcid":false,"given":"Felix","family":"Igelbrink","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2503-0316","authenticated-orcid":false,"given":"Lennart","family":"Niecksch","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-7656-1670","authenticated-orcid":false,"given":"Marian","family":"Renz","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2276-3140","authenticated-orcid":false,"given":"Martin","family":"G\u00fcnther","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2480-6901","authenticated-orcid":false,"given":"Martin","family":"Atzmueller","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2026,1,30]]},"reference":[{"issue":"11","key":"902_CR1","doi-asserted-by":"publisher","first-page":"915","DOI":"10.1016\/j.robot.2008.08.001","volume":"56","author":"A N\u00fcchter","year":"2008","unstructured":"N\u00fcchter A, Hertzberg J (2008) Towards semantic maps for mobile robots. Robot Auton Syst 56(11):915\u2013926. https:\/\/doi.org\/10.1016\/j.robot.2008.08.001","journal-title":"Robot Auton Syst"},{"key":"902_CR2","unstructured":"Radford A, Kim JW, Hallacy C, Ramesh A, Goh G, Agarwal S, et\u00a0al (2021) Learning Transferable Visual Models from Natural Language Supervision. In: International conference on machine learning (ICML). PMLR, pp 8748\u20138763"},{"key":"902_CR3","doi-asserted-by":"crossref","unstructured":"Rosinol A, Gupta A, Abate M, Shi J, Carlone L (2020) 3D Dynamic Scene Graphs: Actionable Spatial Perception with Places, Objects, and Humans. Robotics: Science and Systems (RSS)","DOI":"10.15607\/RSS.2020.XVI.079"},{"key":"902_CR4","doi-asserted-by":"crossref","unstructured":"Hughes N, Chang Y, Carlone L (2022) Hydra: a real-time spatial perception system for 3D scene graph construction and optimization. In: Robotics: science and systems (RSS). Article 50","DOI":"10.15607\/RSS.2022.XVIII.050"},{"key":"902_CR5","doi-asserted-by":"crossref","unstructured":"Liu S, Zeng Z, Ren T, Li F, Zhang H, Yang J, et\u00a0al (2024) Grounding DINO: marrying DINO with grounded pre-training for open-set object detection. In: European conference on computer vision (ECCV). Springer, pp 38\u201355","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"902_CR6","doi-asserted-by":"publisher","unstructured":"Ren T, Jiang Q, Liu S, Zeng Z, Liu W, Gao H, et\u00a0al (2024) Grounding DINO 1.5: advance the \u201cEdge\u201d of open-set object detection. https:\/\/doi.org\/10.48550\/ARXIV.2405.10300. arXiv preprint arXiv:2405.10300","DOI":"10.48550\/ARXIV.2405.10300"},{"key":"902_CR7","doi-asserted-by":"publisher","unstructured":"Zeng F, Gan W, Wang Y, Liu N, Yu PS (2023) Large language models for robotics: a survey. https:\/\/doi.org\/10.48550\/ARXIV.2311.07226. arXiv:2311.07226","DOI":"10.48550\/ARXIV.2311.07226"},{"issue":"12","key":"902_CR8","doi-asserted-by":"publisher","first-page":"5068","DOI":"10.3390\/app14125068","volume":"14","author":"D Huang","year":"2024","unstructured":"Huang D, Yan C, Li Q, Peng X (2024) From large language models to large multimodal models: a literature review. Appl Sci 14(12):5068","journal-title":"Appl Sci"},{"key":"902_CR9","doi-asserted-by":"crossref","unstructured":"Zhai X, Mustafa B, Kolesnikov A, Beyer L (2023) Sigmoid loss for language image pre-training. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 11975\u201311986","DOI":"10.1109\/ICCV51070.2023.01100"},{"key":"902_CR10","unstructured":"Li J, Li D, Xiong C, Hoi S (2022) BLIP: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: International conference on machine learning (ICML). PMLR, pp 12888\u201312900"},{"key":"902_CR11","unstructured":"Li J, Li D, Savarese S, Hoi S (2023) BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. In: International conference on machine learning (ICML). PMLR, pp 19730\u201319742"},{"key":"902_CR12","doi-asserted-by":"crossref","unstructured":"Zhou C, Loy CC, Dai B (2022) Extract free dense labels from CLIP. In: European conference on computer vision (ECCV). Springer, pp 696\u2013712","DOI":"10.1007\/978-3-031-19815-1_40"},{"key":"902_CR13","doi-asserted-by":"crossref","unstructured":"Ghiasi G, Gu X, Cui Y, Lin TY (2022) Scaling open-vocabulary image segmentation with image-level labels. In: European conference on computer vision (ECCV). Springer, pp 540\u2013557","DOI":"10.1007\/978-3-031-20059-5_31"},{"key":"902_CR14","unstructured":"Ding Z, Wang J, Tu Z (2023) Open-vocabulary universal image segmentation with MaskCLIP. In: International conference on machine learning (ICML), vol 202. PMLR, pp 8090\u20138102"},{"key":"902_CR15","doi-asserted-by":"crossref","unstructured":"L\u00fcddecke T, Ecker AS (2022) Image segmentation using text and image prompts. In: IEEE\/CVF conference on computer vision and pattern recognition (CVPR). IEEE, pp 7076\u20137086","DOI":"10.1109\/CVPR52688.2022.00695"},{"key":"902_CR16","doi-asserted-by":"crossref","unstructured":"Kirillov A, Mintun E, Ravi N, Mao H, Rolland C, Gustafson L, et\u00a0al (2023) Segment anything. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 4015\u20134026","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"902_CR17","doi-asserted-by":"crossref","unstructured":"Yamazaki K, Hanyu T, Vo K, Pham T, Tran M, Doretto G, et\u00a0al (2024) Open-fusion: real-time open-vocabulary 3D mapping and queryable scene representation. In: 2024 IEEE international conference on robotics and automation (ICRA). IEEE, pp 9411\u20139417","DOI":"10.1109\/ICRA57147.2024.10610193"},{"issue":"10","key":"902_CR18","doi-asserted-by":"publisher","first-page":"8921","DOI":"10.1109\/LRA.2024.3451395","volume":"9","author":"D Maggio","year":"2024","unstructured":"Maggio D, Chang Y, Hughes N, Trang M, Griffith D, Dougherty C et al (2024) Clio: real-time task-driven open-set 3D scene graphs. IEEE Robotics Autom Lett 9(10):8921\u20138928. https:\/\/doi.org\/10.1109\/LRA.2024.3451395","journal-title":"IEEE Robotics Autom Lett"},{"key":"902_CR19","unstructured":"Laina SB, Boche S, Papatheodorou S, Schaefer S, Jung J, Leutenegger S (2025) FindAnything: open-vocabulary and object-centric mapping for robot exploration in any environment. arXiv preprint arXiv:2504.08603"},{"key":"902_CR20","unstructured":"Kassab C, Mattamala M, Morin S, B\u00fcchner M, Valada A, Paull L, et\u00a0al (2024) The bare necessities: designing simple, effective open-vocabulary scene graphs. arXiv preprint arXiv:2412.01539"},{"key":"902_CR21","doi-asserted-by":"crossref","unstructured":"Linok S, Zemskova T, Ladanova S, Titkov R, Yudin D, Monastyrny M, et\u00a0al (2025) Beyond bare queries: open-vocabulary object grounding with 3D scene graph. In: IEEE international conference on robotics and automation (ICRA). IEEE, pp 13582\u201313589","DOI":"10.1109\/ICRA55743.2025.11128059"},{"key":"902_CR22","unstructured":"Zhao X, Ding W, An Y, Du Y, Yu T, Li M, et\u00a0al (2023) Fast segment anything. arXiv preprint arXiv:2306.12156"},{"key":"902_CR23","unstructured":"Zhang C, Han D, Qiao Y, Kim JU, Bae SH, Lee S, et\u00a0al (2023) Faster segment anything: towards lightweight SAM for mobile applications. arXiv preprint arXiv:2306.14289"},{"key":"902_CR24","doi-asserted-by":"crossref","unstructured":"Gadre SY, Wortsman M, Ilharco G, Schmidt L, Song S (2023) Cows on pasture: baselines and benchmarks for language-driven zero-shot object navigation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 23171\u201323181","DOI":"10.1109\/CVPR52729.2023.02219"},{"key":"902_CR25","doi-asserted-by":"crossref","unstructured":"Chang M, Gervet T, Khanna M, Yenamandra S, Shah D, Min SY, et\u00a0al (2024) GOAT: GO to any thing. In: Proceedings of robotics: science and systems (RSS). Delft, Netherlands, Article 73","DOI":"10.15607\/RSS.2024.XX.073"},{"key":"902_CR26","doi-asserted-by":"crossref","unstructured":"Ren AZ, Clark J, Dixit A, Itkina M, Majumdar A, Sadigh D (2024) Explore until confident: efficient exploration for embodied question answering. In: Proceedings of robotics: science and systems (RSS). Delft, Netherlands, Article 89","DOI":"10.15607\/RSS.2024.XX.089"},{"issue":"15","key":"902_CR27","doi-asserted-by":"publisher","first-page":"114","DOI":"10.1016\/j.ifacol.2016.07.718","volume":"49","author":"T Gedicke","year":"2016","unstructured":"Gedicke T, G\u00fcnther M, Hertzberg J (2016) FLAP for CAOS: forward-looking active perception for clutter-aware object search. IFAC-PapersOnLine 49(15):114\u2013119","journal-title":"IFAC-PapersOnLine"},{"key":"902_CR28","doi-asserted-by":"crossref","unstructured":"Liu P, Orru Y, Vakil J, Paxton C, Shafiullah NMM, Pinto L (2024) Demonstrating OK-robot: what really matters in integrating open-knowledge models for robotics. In: Proceedings of robotics: science and systems (RSS). Delft, Netherlands, Article 91","DOI":"10.15607\/RSS.2024.XX.091"},{"key":"902_CR29","doi-asserted-by":"crossref","unstructured":"Wang C, Fang HS, Gou M, Fang H, Gao J, Lu C (2021) Graspness discovery in clutters for fast and accurate grasp detection. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 15964\u201315973","DOI":"10.1109\/ICCV48922.2021.01566"},{"issue":"5","key":"902_CR30","doi-asserted-by":"publisher","first-page":"3929","DOI":"10.1109\/TRO.2023.3281153","volume":"39","author":"HS Fang","year":"2023","unstructured":"Fang HS, Wang C, Fang H, Gou M, Liu J, Yan H et al (2023) AnyGrasp: robust and efficient grasp perception in spatial and temporal domains. IEEE Trans Rob 39(5):3929\u20133945","journal-title":"IEEE Trans Rob"},{"key":"902_CR31","doi-asserted-by":"crossref","unstructured":"Savva M, Kadian A, Maksymets O, Zhao Y, Wijmans E, Jain B, et\u00a0al (2019) Habitat: a platform for embodied AI research. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV); pp 9339\u20139347","DOI":"10.1109\/ICCV.2019.00943"},{"key":"902_CR32","doi-asserted-by":"crossref","unstructured":"Chang A, Dai A, Funkhouser T, Halber M, Niessner M, Savva M, et\u00a0al (2017) Matterport3D: learning from RGB-D data in indoor environments. arXiv preprint arXiv:1709.06158","DOI":"10.1109\/3DV.2017.00081"},{"key":"902_CR33","unstructured":"Ramakrishnan SK, Gokaslan A, Wijmans E, Maksymets O, Clegg A, Turner J, et\u00a0al (2021) Habitat-Matterport 3D dataset (HM3D): 1000 large-scale 3D environments for embodied AI. arXiv preprint arXiv:2109.08238"},{"key":"902_CR34","doi-asserted-by":"crossref","unstructured":"Mock A, Wiemann T, P\u00fctz S, Hertzberg J (2024) MICP-L: mesh-based ICP for robot localization using hardware-accelerated ray casting. In: 2024 IEEE\/RSJ international conference on intelligent robots and systems (IROS). IEEE, pp 10664\u201310671","DOI":"10.1109\/IROS58592.2024.10802360"}],"container-title":["KI - K\u00fcnstliche Intelligenz"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13218-026-00902-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13218-026-00902-6","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13218-026-00902-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,22]],"date-time":"2026-05-22T07:50:52Z","timestamp":1779436252000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13218-026-00902-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,30]]},"references-count":34,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["902"],"URL":"https:\/\/doi.org\/10.1007\/s13218-026-00902-6","relation":{},"ISSN":["0933-1875","1610-1987"],"issn-type":[{"value":"0933-1875","type":"print"},{"value":"1610-1987","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,30]]},"assertion":[{"value":"18 December 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 December 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no Conflict of interest to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"All data used in this paper is publicly available (Matterport3D:\n                      \n                      , Habitat-Matterport:\n                      \n                      TIAGo Maps:\n                      \n                      ). Source code for the semantic mapping framework will be made available at a later date.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Data availability"}}]}}