{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:08:37Z","timestamp":1777655317752,"version":"3.51.4"},"publisher-location":"Cham","reference-count":82,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729126","type":"print"},{"value":"9783031729133","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72913-3_12","type":"book-chapter","created":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T21:45:00Z","timestamp":1733089500000},"page":"201-220","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["\u201cWhere am I?\u201d Scene Retrieval with\u00a0Language"],"prefix":"10.1007","author":[{"given":"Jiaqi","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniel","family":"Barath","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Iro","family":"Armeni","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marc","family":"Pollefeys","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hermann","family":"Blum","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,2]]},"reference":[{"key":"12_CR1","unstructured":"Achiam, J., et\u00a0al.: GPT-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"key":"12_CR2","doi-asserted-by":"crossref","unstructured":"Achlioptas, P., Abdelreheem, A., Xia, F., Elhoseiny, M., Guibas, L.J.: ReferIt3D: neural listeners for fine-grained 3D object identification in real-world scenes. In: European Conference on Computer Vision (2020). https:\/\/api.semanticscholar.org\/CorpusID:221378802","DOI":"10.1007\/978-3-030-58452-8_25"},{"key":"12_CR3","unstructured":"Agia, C., et al.: TASKOGRAPHY: evaluating robot task planning over large 3D scene graphs. In: Conference on Robot Learning, pp. 46\u201358. PMLR (2022)"},{"key":"12_CR4","unstructured":"Ahn, M., et al.: Do as i can, not as i say: grounding language in robotic affordances (2022)"},{"key":"12_CR5","doi-asserted-by":"crossref","unstructured":"Arandjelovic, R., Gronat, P., Torii, A., Pajdla, T., Sivic, J.: NetVLAD: CNN architecture for weakly supervised place recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5297\u20135307 (2016)","DOI":"10.1109\/CVPR.2016.572"},{"key":"12_CR6","doi-asserted-by":"crossref","unstructured":"Armeni, I., et al.: 3D scene graph: a structure for unified semantics, 3D space, and camera (2019)","DOI":"10.1109\/ICCV.2019.00576"},{"key":"12_CR7","doi-asserted-by":"crossref","unstructured":"Azuma, D., Miyanishi, T., Kurita, S., Kawanabe, M.: ScanQA: 3D question answering for spatial scene understanding (2022)","DOI":"10.1109\/CVPR52688.2022.01854"},{"key":"12_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"782","DOI":"10.1007\/978-3-030-01264-9_46","volume-title":"Computer Vision \u2013 ECCV 2018","author":"V Balntas","year":"2018","unstructured":"Balntas, V., Li, S., Prisacariu, V.: RelocNet: continuous metric learning relocalisation using neural nets. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) Computer Vision \u2013 ECCV 2018. LNCS, vol. 11218, pp. 782\u2013799. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01264-9_46"},{"key":"12_CR9","doi-asserted-by":"crossref","unstructured":"Bernreiter, L., Ott, L., Nieto, J., Siegwart, R., Cadena, C.: Spherical multi-modal place recognition for heterogeneous sensor systems. In: 2021 IEEE International Conference on Robotics and Automation (ICRA), pp. 1743\u20131750. IEEE (2021)","DOI":"10.1109\/ICRA48506.2021.9561078"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Bhayani, S., Sattler, T., Barath, D., Beliansky, P., Heikkil\u00e4, J., Kukelova, Z.: Calibrated and partially calibrated semi-generalized homographies. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5936\u20135945 (2021)","DOI":"10.1109\/ICCV48922.2021.00588"},{"key":"12_CR11","doi-asserted-by":"publisher","unstructured":"Biswas, J., Veloso, M.: Wifi localization and navigation for autonomous indoor mobile robots. In: 2010 IEEE International Conference on Robotics and Automation, pp. 4379\u20134384 (2010). https:\/\/doi.org\/10.1109\/ROBOT.2010.5509842","DOI":"10.1109\/ROBOT.2010.5509842"},{"key":"12_CR12","doi-asserted-by":"publisher","unstructured":"Boonsriwai, S., Apavatjrut, A.: Indoor wifi localization on mobile devices. In: 2013 10th International Conference on Electrical Engineering\/Electronics, Computer, Telecommunications and Information Technology, pp.\u00a01\u20135 (2013). https:\/\/doi.org\/10.1109\/ECTICon.2013.6559592","DOI":"10.1109\/ECTICon.2013.6559592"},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Brachmann, E., et al.: DSAC - differentiable RANSAC for camera localization. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.267"},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Brachmann, E., Rother, C.: Learning less is more - 6D camera localization via 3D surface regression. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00489"},{"issue":"9","key":"12_CR15","first-page":"5847","volume":"44","author":"E Brachmann","year":"2021","unstructured":"Brachmann, E., Rother, C.: Visual camera re-localization from RGB and RGB-D images using DSAC. IEEE Trans. Pattern Anal. Mach. Intell. 44(9), 5847\u20135865 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"12_CR16","doi-asserted-by":"publisher","unstructured":"Brossard, M., Bonnabel, S.: Learning wheel odometry and IMU errors for localization. In: 2019 International Conference on Robotics and Automation (ICRA), pp. 291\u2013297 (2019). https:\/\/doi.org\/10.1109\/ICRA.2019.8794237","DOI":"10.1109\/ICRA.2019.8794237"},{"key":"12_CR17","unstructured":"Brown, T.B., Mann, B., et\u00a0al.: Language models are few-shot learners (2020)"},{"key":"12_CR18","doi-asserted-by":"publisher","unstructured":"Cai, G.S., Lin, H.Y., Kao, S.F.: Mobile robot localization using GPS, IMU and visual odometry. In: 2019 International Automatic Control Conference (CACS), pp.\u00a01\u20136 (2019). https:\/\/doi.org\/10.1109\/CACS47674.2019.9024731","DOI":"10.1109\/CACS47674.2019.9024731"},{"key":"12_CR19","doi-asserted-by":"crossref","unstructured":"Cavallari, T., Bertinetto, L., Mukhoti, J., Torr, P., Golodetz, S.: Let\u2019s take this online: adapting scene coordinate regression network predictions for online RGB-D camera relocalisation. In: 3DV (2019)","DOI":"10.1109\/3DV.2019.00068"},{"key":"12_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"202","DOI":"10.1007\/978-3-030-58565-5_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"DZ Chen","year":"2020","unstructured":"Chen, D.Z., Chang, A.X., Nie\u00dfner, M.: ScanRefer: 3D object localization in RGB-D scans using natural language. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12365, pp. 202\u2013221. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58565-5_13"},{"key":"12_CR21","doi-asserted-by":"crossref","unstructured":"Chen, D.Z., Gholami, A., Nie\u00dfner, M., Chang, A.X.: Scan2Cap: context-aware dense captioning in RGB-D scans (2020)","DOI":"10.1109\/CVPR46437.2021.00321"},{"key":"12_CR22","doi-asserted-by":"crossref","unstructured":"Chen, D., Lin, Y., Li, W., Li, P., Zhou, J., Sun, X.: Measuring and relieving the over-smoothing problem for graph neural networks from the topological view (2019)","DOI":"10.1609\/aaai.v34i04.5747"},{"key":"12_CR23","doi-asserted-by":"crossref","unstructured":"Chen, Z., Maffra, F., Sa, I., Chli, M.: Only look once, mining distinctive landmarks from convnet for visual place recognition. In: 2017 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 9\u201316 (2017)","DOI":"10.1109\/IROS.2017.8202131"},{"key":"12_CR24","doi-asserted-by":"crossref","unstructured":"Dai, A., Chang, A.X., Savva, M., Halber, M., Funkhouser, T., Nie\u00dfner, M.: ScanNet: richly-annotated 3D reconstructions of indoor scenes (2017)","DOI":"10.1109\/CVPR.2017.261"},{"key":"12_CR25","doi-asserted-by":"crossref","unstructured":"Dhamo, H., Manhardt, F., Navab, N., Tombari, F.: Graph-to-3D: end-to-end generation and manipulation of 3d scenes using scene graphs. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 16352\u201316361 (2021)","DOI":"10.1109\/ICCV48922.2021.01604"},{"key":"12_CR26","doi-asserted-by":"crossref","unstructured":"Ding, M., Wang, Z., Sun, J., Shi, J., Luo, P.: CamNet: coarse-to-fine retrieval for camera re-localization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2871\u20132880 (2019)","DOI":"10.1109\/ICCV.2019.00296"},{"key":"12_CR27","doi-asserted-by":"crossref","unstructured":"Feng, M., et al.: Free-form description guided 3d visual graph network for object grounding in point cloud (2021)","DOI":"10.1109\/ICCV48922.2021.00370"},{"key":"12_CR28","doi-asserted-by":"crossref","unstructured":"Gadre, S.Y., Ehsani, K., Song, S., Mottaghi, R.: Continuous scene representations for embodied AI. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14849\u201314859 (2022)","DOI":"10.1109\/CVPR52688.2022.01443"},{"key":"12_CR29","doi-asserted-by":"crossref","unstructured":"Gao, P., Liang, J., Shen, Y., Son, S., Lin, M.C.: Visual, spatial, geometric-preserved place recognition for cross-view and cross-modal collaborative perception. In: 2023 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 11079\u201311086. IEEE (2023)","DOI":"10.1109\/IROS55552.2023.10341898"},{"key":"12_CR30","doi-asserted-by":"crossref","unstructured":"Garg, S., Fischer, T., Milford, M.: Where is your place, visual place recognition? (2021)","DOI":"10.24963\/ijcai.2021\/603"},{"key":"12_CR31","doi-asserted-by":"crossref","unstructured":"Garg, S., Suenderhauf, N., Milford, M.: Semantic\u2013geometric visual place recognition: a new perspective for reconciling opposing views. Int. J. Robot. Res. (2019)","DOI":"10.1177\/0278364919839761"},{"key":"12_CR32","doi-asserted-by":"crossref","unstructured":"Germain, H., Bourmaud, G., Lepetit, V.: Sparse-to-dense hypercolumn matching for long-term visual localization. In: International Conference on 3D Vision (3DV) (2019)","DOI":"10.1109\/3DV.2019.00063"},{"key":"12_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"626","DOI":"10.1007\/978-3-030-58580-8_37","volume-title":"Computer Vision \u2013 ECCV 2020","author":"H Germain","year":"2020","unstructured":"Germain, H., Bourmaud, G., Lepetit, V.: S2DNet: learning image features for accurate sparse-to-dense matching. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12348, pp. 626\u2013643. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58580-8_37"},{"key":"12_CR34","doi-asserted-by":"crossref","unstructured":"Guo, Z., et al.: ViewRefer: grasp the multi-view knowledge for 3D visual grounding with GPT and prototype guidance (2023)","DOI":"10.1109\/ICCV51070.2023.01410"},{"issue":"2","key":"12_CR35","doi-asserted-by":"publisher","first-page":"1924","DOI":"10.1109\/LRA.2019.2898427","volume":"4","author":"S Hausler","year":"2019","unstructured":"Hausler, S., Jacobson, A., Milford, M.: Multi-process fusion: visual place recognition using multiple image processing methods. IEEE Robot. Autom. Lett. 4(2), 1924\u20131931 (2019)","journal-title":"IEEE Robot. Autom. Lett."},{"key":"12_CR36","doi-asserted-by":"crossref","unstructured":"Huang, S., Chen, Y., Jia, J., Wang, L.: Multi-view transformer for 3D visual grounding (2022)","DOI":"10.1109\/CVPR52688.2022.01508"},{"key":"12_CR37","doi-asserted-by":"crossref","unstructured":"Irschara, A., Zach, C., Frahm, J.M., Bischof, H.: From structure-from-motion point clouds to fast location recognition. In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206587"},{"key":"12_CR38","unstructured":"Ji, X., Wei, J., Wang, Y., Shang, H., Kneip, L.: Cross-modal place recognition in image databases using event-based sensors. arXiv preprint arXiv:2307.01047 (2023)"},{"key":"12_CR39","doi-asserted-by":"crossref","unstructured":"Jiao, Z., Niu, Y., Zhang, Z., Zhu, S.C., Zhu, Y., Liu, H.: Sequential manipulation planning on scene graph. In: 2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 8203\u20138210. IEEE (2022)","DOI":"10.1109\/IROS47612.2022.9981735"},{"key":"12_CR40","doi-asserted-by":"crossref","unstructured":"Kendall, A., Cipolla, R.: Geometric loss functions for camera pose regression with deep learning. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.694"},{"key":"12_CR41","doi-asserted-by":"crossref","unstructured":"Kendall, A., Grimes, M., Cipolla, R.: Posenet: A convolutional network for real-time 6-DoF camera relocalization. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.336"},{"issue":"2","key":"12_CR42","doi-asserted-by":"publisher","first-page":"561","DOI":"10.1109\/TRO.2019.2956352","volume":"36","author":"A Khaliq","year":"2020","unstructured":"Khaliq, A., Ehsan, S., Chen, Z., Milford, M., McDonald-Maier, K.: A holistic visual place recognition approach using lightweight CNNs for significant viewpoint and appearance changes. IEEE Trans. Rob. 36(2), 561\u2013569 (2020)","journal-title":"IEEE Trans. Rob."},{"key":"12_CR43","doi-asserted-by":"crossref","unstructured":"Kolmet, M., Zhou, Q., Osep, A., Leal-Taixe, L.: Text2Pos: text-to-point-cloud cross-modal localization (2022)","DOI":"10.1109\/CVPR52688.2022.00657"},{"key":"12_CR44","doi-asserted-by":"publisher","unstructured":"Kotaru, M., Joshi, K., Bharadia, D., Katti, S.: SpotFi: decimeter level localization using WiFi. In: Proceedings of the 2015 ACM Conference on Special Interest Group on Data Communication, SIGCOMM 2015, pp. 269\u2013282. Association for Computing Machinery, New York (2015). https:\/\/doi.org\/10.1145\/2785956.2787487","DOI":"10.1145\/2785956.2787487"},{"issue":"4","key":"12_CR45","doi-asserted-by":"publisher","first-page":"459","DOI":"10.1007\/s11370-022-00428-4","volume":"15","author":"Y Li","year":"2022","unstructured":"Li, Y., Ma, Y., Huo, X., Wu, X.: Remote object navigation for service robots using hierarchical knowledge graph in human-centered environments. Intel. Serv. Robot. 15(4), 459\u2013473 (2022)","journal-title":"Intel. Serv. Robot."},{"key":"12_CR46","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/978-3-642-33718-5_2","volume-title":"Computer Vision \u2013 ECCV 2012","author":"Y Li","year":"2012","unstructured":"Li, Y., Snavely, N., Huttenlocher, D., Fua, P.: Worldwide pose estimation using 3D point clouds. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7572, pp. 15\u201329. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33718-5_2"},{"key":"12_CR47","doi-asserted-by":"crossref","unstructured":"Liu, L., Li, H., Dai, Y.: Efficient global 2D-3D matching for camera localization in a large-scale 3D map. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.260"},{"issue":"22","key":"12_CR48","doi-asserted-by":"publisher","first-page":"8821","DOI":"10.3390\/s22228821","volume":"22","author":"W Liu","year":"2022","unstructured":"Liu, W., Qin, C., Deng, Z., Jiang, H.: LRF-WiVi: a WiFi and visual indoor localization method based on low-rank fusion. Sensors 22(22), 8821 (2022). https:\/\/doi.org\/10.3390\/s22228821","journal-title":"Sensors"},{"issue":"9","key":"12_CR49","doi-asserted-by":"publisher","first-page":"1061","DOI":"10.1177\/0278364920931151","volume":"39","author":"S Lynen","year":"2020","unstructured":"Lynen, S., et al.: Large-scale, real-time visual-inertial localization revisited. Int. J. Robot. Res. (IJRR) 39(9), 1061\u20131084 (2020)","journal-title":"Int. J. Robot. Res. (IJRR)"},{"key":"12_CR50","unstructured":"Ma, X., et al.: SQA3D: situated question answering in 3D scenes (2023)"},{"key":"12_CR51","doi-asserted-by":"crossref","unstructured":"Matsuzaki, S., et al.: CLIP-Loc: multi-modal landmark association for global localization in object-based maps (2024)","DOI":"10.1109\/ICRA57147.2024.10611393"},{"key":"12_CR52","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space (2013)"},{"key":"12_CR53","doi-asserted-by":"publisher","unstructured":"Mikov, A., Panyov, A., Kosyanchuk, V., Prikhodko, I.: Sensor fusion for land vehicle localization using inertial mems and odometry. In: 2019 IEEE International Symposium on Inertial Sensors and Systems (INERTIAL), pp.\u00a01\u20132 (2019). https:\/\/doi.org\/10.1109\/ISISS.2019.8739427","DOI":"10.1109\/ISISS.2019.8739427"},{"key":"12_CR54","unstructured":"Moreau, A., Piasco, N., Tsishkou, D., Stanciulescu, B., de\u00a0La\u00a0Fortelle, A.: LENS: localization enhanced by nerf synthesis. In: CoRL (2021)"},{"key":"12_CR55","doi-asserted-by":"crossref","unstructured":"Pion, N., Humenberger, M., Csurka, G., Cabon, Y., Sattler, T.: Benchmarking image retrieval for visual localization. In: 2020 International Conference on 3D Vision (3DV), pp. 483\u2013494. IEEE (2020)","DOI":"10.1109\/3DV50981.2020.00058"},{"key":"12_CR56","doi-asserted-by":"publisher","unstructured":"Pion, N., Humenberger, M., Csurka, G., Cabon, Y., Sattler, T.: Benchmarking image retrieval for visual localization. In: 2020 International Conference on 3D Vision (3DV), pp. 483\u2013494 (2020). https:\/\/doi.org\/10.1109\/3DV50981.2020.00058","DOI":"10.1109\/3DV50981.2020.00058"},{"issue":"23","key":"12_CR57","doi-asserted-by":"publisher","first-page":"5084","DOI":"10.3390\/s19235084","volume":"19","author":"A Poulose","year":"2019","unstructured":"Poulose, A., Han, D.S.: Hybrid indoor localization using IMU sensors and smartphone camera. Sensors 19(23), 5084 (2019). https:\/\/doi.org\/10.3390\/s19235084","journal-title":"Sensors"},{"key":"12_CR58","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision (2021)"},{"key":"12_CR59","doi-asserted-by":"crossref","unstructured":"Rajvanshi, A., et al.: SayNav: grounding large language models for dynamic planning to navigation in new environments (2023)","DOI":"10.1609\/icaps.v34i1.31506"},{"key":"12_CR60","unstructured":"Rana, K., Haviland, J., Garg, S., Abou-Chakra, J., Reid, I., Suenderhauf, N.: SayPlan: grounding large language models using 3D scene graphs for scalable robot task planning (2023)"},{"key":"12_CR61","unstructured":"Ravichandran, Z., Griffith, J.D., Smith, B., Frost, C.: Bridging scene understanding and task execution with flexible simulation environments. arXiv preprint arXiv:2011.10452 (2020)"},{"key":"12_CR62","unstructured":"Roh, J., Desingh, K., Farhadi, A., Fox, D.: LanguageRefer: spatial-language model for 3D visual grounding (2021)"},{"key":"12_CR63","unstructured":"Rusch, T.K., Bronstein, M.M., Mishra, S.: A survey on oversmoothing in graph neural networks (2023)"},{"key":"12_CR64","doi-asserted-by":"crossref","unstructured":"Sarlin, P.E., Cadena, C., Siegwart, R., Dymczyk, M.: From coarse to fine: robust hierarchical localization at large scale. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01300"},{"key":"12_CR65","doi-asserted-by":"crossref","unstructured":"Sarlin, P.E., DeTone, D., Malisiewicz, T., Rabinovich, A.: SuperGlue: learning feature matching with graph neural networks (2019)","DOI":"10.1109\/CVPR42600.2020.00499"},{"key":"12_CR66","doi-asserted-by":"crossref","unstructured":"Sarlin, P.E., et\u00a0al.: Back to the feature: learning robust camera localization from pixels to pose. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3247\u20133257 (2021)","DOI":"10.1109\/CVPR46437.2021.00326"},{"key":"12_CR67","doi-asserted-by":"crossref","unstructured":"Sattler, T., Leibe, B., Kobbelt, L.: Efficient & effective prioritized matching for large-scale image-based localization. PAMI (2017)","DOI":"10.1109\/TPAMI.2016.2611662"},{"key":"12_CR68","doi-asserted-by":"crossref","unstructured":"Sattler, T., Zhou, Q., Pollefeys, M., Leal-Taixe, L.: Understanding the limitations of CNN-based absolute camera pose regression. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3302\u20133312 (2019)","DOI":"10.1109\/CVPR.2019.00342"},{"key":"12_CR69","doi-asserted-by":"crossref","unstructured":"Sch\u00f6nberger, J.L., Pollefeys, M., Geiger, A., Sattler, T.: Semantic visual localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6896\u20136906 (2018)","DOI":"10.1109\/CVPR.2018.00721"},{"key":"12_CR70","doi-asserted-by":"crossref","unstructured":"Shi, Y., Huang, Z., Feng, S., Zhong, H., Wang, W., Sun, Y.: Masked label prediction: Unified message passing model for semi-supervised classification (2021)","DOI":"10.24963\/ijcai.2021\/214"},{"issue":"7","key":"12_CR71","doi-asserted-by":"publisher","first-page":"1455","DOI":"10.1109\/TPAMI.2016.2598331","volume":"39","author":"L Svarm","year":"2017","unstructured":"Svarm, L., Enqvist, O., Kahl, F., Oskarsson, M.: City-scale localization for cameras with known vertical direction. PAMI 39(7), 1455\u20131461 (2017)","journal-title":"PAMI"},{"key":"12_CR72","doi-asserted-by":"crossref","unstructured":"Valentin, J., Nie\u00dfner, M., Shotton, J., Fitzgibbon, A., Izadi, S., Torr, P.: Exploiting uncertainty in regression forests for accurate camera relocalization. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7299069"},{"key":"12_CR73","doi-asserted-by":"crossref","unstructured":"Walch, F., Hazirbas, C., Leal-Taixe, L., Sattler, T., Hilsenbeck, S., Cremers, D.: Image-based localization using LSTMs for structured feature correlation. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.75"},{"key":"12_CR74","doi-asserted-by":"crossref","unstructured":"Wald, J., Avetisyan, A., Navab, N., Tombari, F., Niessner, M.: RIO: 3D object instance re-localization in changing indoor environments. In: Proceedings IEEE International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00775"},{"key":"12_CR75","doi-asserted-by":"crossref","unstructured":"Wald, J., Dhamo, H., Navab, N., Tombari, F.: Learning 3D semantic scene graphs from 3D indoor reconstructions (2020)","DOI":"10.1109\/CVPR42600.2020.00402"},{"key":"12_CR76","doi-asserted-by":"crossref","unstructured":"Wang, G., Fan, H., Kankanhalli, M.: Text to point cloud localization with relation-enhanced transformer (2023)","DOI":"10.1609\/aaai.v37i2.25347"},{"key":"12_CR77","doi-asserted-by":"crossref","unstructured":"Wang, H., Zhang, C., Yu, J., Cai, W.: Spatiality-guided transformer for 3D dense captioning on point clouds (2022)","DOI":"10.24963\/ijcai.2022\/194"},{"key":"12_CR78","doi-asserted-by":"crossref","unstructured":"Xia, Y., Shi, L., Ding, Z., Henriques, J.F., Cremers, D.: Text2Loc: 3D point cloud localization from natural language (2023)","DOI":"10.1109\/CVPR52733.2024.01417"},{"key":"12_CR79","doi-asserted-by":"crossref","unstructured":"Zeisl, B., Sattler, T., Pollefeys, M.: Camera pose voting for large-scale image-based localization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2704\u20132712 (2015)","DOI":"10.1109\/ICCV.2015.310"},{"key":"12_CR80","doi-asserted-by":"crossref","unstructured":"Zhang, W., Kosecka, J.: Image based localization in urban environments. In: International Symposium on 3D Data Processing, Visualization, and Transmission (2006)","DOI":"10.1109\/3DPVT.2006.80"},{"key":"12_CR81","doi-asserted-by":"crossref","unstructured":"Zheng, E., Wu, C.: Structure from motion using structure-less resection. In: The IEEE International Conference on Computer Vision (ICCV) (2015)","DOI":"10.1109\/ICCV.2015.240"},{"key":"12_CR82","doi-asserted-by":"crossref","unstructured":"Zhu, Z., Ma, X., Chen, Y., Deng, Z., Huang, S., Li, Q.: 3D-VisTA: pre-trained transformer for 3D vision and text alignment (2023)","DOI":"10.1109\/ICCV51070.2023.00272"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72913-3_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T23:24:04Z","timestamp":1733095444000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72913-3_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,2]]},"ISBN":["9783031729126","9783031729133"],"references-count":82,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72913-3_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,2]]},"assertion":[{"value":"2 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}