{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T21:35:10Z","timestamp":1770845710731,"version":"3.50.1"},"reference-count":51,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,5]],"date-time":"2025-10-05T00:00:00Z","timestamp":1759622400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,5]],"date-time":"2025-10-05T00:00:00Z","timestamp":1759622400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,5]]},"DOI":"10.1109\/smc58881.2025.11343475","type":"proceedings-article","created":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T20:54:44Z","timestamp":1769633684000},"page":"5153-5160","source":"Crossref","is-referenced-by-count":0,"title":["FusionNav: Enhancing Zero-Shot Object-Goal Navigation via 3D Semantic Fusion and Farsight Value Reasoning"],"prefix":"10.1109","author":[{"given":"Shugao","family":"Liu","sequence":"first","affiliation":[{"name":"Chinese Academy of Sciences,Institute of Automation,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qichao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences,Institute of Automation,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haoran","family":"Li","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences,Institute of Automation,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dongbin","family":"Zhao","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences,Institute of Automation,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Zson: Zero-shot object-goal navigation using multimodal goal embeddings","author":"Majumdar","year":"2022","journal-title":"Neural Information Processing Systems (NeurIPS)"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160969"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2023.xix.066"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/icra57147.2024.10610193"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/2010324.1964929"},{"key":"ref6","article-title":"Weakly supervised 3d open-vocabulary segmentation","author":"Liu","year":"2023"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/icra57147.2024.10610243"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3482190"},{"key":"ref9","first-page":"368","article-title":"Navigation instruction generation withnbsp;bev perception andnbsp;large language models","volume-title":"Computer Vision \u2013 ECCV 2024: 18th European Conference, Milan, Italy, September 29 \u2013 October 4, 2024, Proceedings, Part XXII","author":"Fan"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610712"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989538"},{"key":"ref12","article-title":"Habitat-matterport 3d dataset (HM3d): 1000 large-scale 3d environments for embodied AI","volume-title":"Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)","author":"Ramakrishnan"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00081"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICME57554.2024.10687514"},{"key":"ref15","article-title":"Language-driven semantic segmentation","volume-title":"International Conference on Learning Representations","author":"Li"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref17","article-title":"Learning transferable visual models from natural language supervision","volume-title":"International Conference on Machine Learning (ICML)","author":"Radford"},{"key":"ref18","article-title":"Segment everything everywhere all at once","author":"Zou","year":"2023","journal-title":"Neural Information Processing Systems (NeurIPS)"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2024.3383158"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2927869"},{"key":"ref21","first-page":"4283","article-title":"Semantic visual navigation by watching youtube videos","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Chang","year":"2020"},{"key":"ref22","article-title":"ProcTHOR: Large-Scale Embodied AI Using Procedural Generation","author":"Deitke","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref23","article-title":"DD-PPO: Learning near-perfect pointgoal navigators from 2.5 billion frames","volume-title":"International Conference on Learning Representations (ICLR)","author":"Wijmans"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2314"},{"key":"ref26","article-title":"Poliformer: Scaling on-policy rl with transformers results in masterful navigators","volume-title":"Proceedings of the Conference on Robot Learning (CoRL)","author":"Zeng"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/cira.1997.613851"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02219"},{"key":"ref29","article-title":"Esc: exploration with soft commonsense constraints for zero-shot object navigation","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"Zhou"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01069"},{"key":"ref31","article-title":"Debertav3: Improving deberta using electra-style pre-training with gradient-disentangled embedding sharing","author":"He","year":"2023"},{"key":"ref32","article-title":"Hinge-loss markov random fields and probabilistic soft logic","author":"Bach","year":"2015","journal-title":"Journal of Machine Learning Research, Journal of Machine Learning Research"},{"key":"ref33","article-title":"Sg-nav: Online 3d scene graph prompting for llm-based zero-shot object navigation","author":"Yin","year":"2024"},{"key":"ref34","article-title":"Grounded sam: Assembling open-world models for diverse visual tasks","author":"Ren","year":"2024"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICME59968.2025.11208899"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2025.3543364"},{"key":"ref37","article-title":"Blip-2: bootstrapping language-image pre-training with frozen image encoders and large language models","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"Li"},{"key":"ref38","first-page":"127","article-title":"Kinectfusion: Real-time dense surface mapping and tracking","volume-title":"ISMAR \u201911 Proceedings of the 2011 10th IEEE International Symposium on Mixed and Augmented Reality.","author":"Newcombe"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/361002.361007"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52729.2023.00721"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"ref43","article-title":"Faster segment anything: Towards lightweight sam for mobile applications","author":"Zhang","year":"2023"},{"key":"ref44","article-title":"Habitat 2.0: Training home assistants to rearrange their habitat","author":"Szot","year":"2021","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"ref45","article-title":"Habitat challenge 2023","author":"Yadav","year":"2023"},{"key":"ref46","article-title":"ObjectNav Revisited: On Evaluation of Embodied Agents Navigating to Objects","author":"Batra","year":"2020"},{"key":"ref47","article-title":"On evaluation of embodied navigation agents","author":"Anderson","year":"2018"},{"key":"ref48","article-title":"Integrating egocentric localization for more realistic point-goal navigation agents","author":"Datta","year":"2020"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01832"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610499"},{"key":"ref51","article-title":"Voronav: Voronoi-based zero-shot object navigation with large language model","author":"Wu","year":"2024"}],"event":{"name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","location":"Vienna, Austria","start":{"date-parts":[[2025,10,5]]},"end":{"date-parts":[[2025,10,8]]}},"container-title":["2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11342430\/11342431\/11343475.pdf?arnumber=11343475","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T20:51:28Z","timestamp":1770843088000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11343475\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,5]]},"references-count":51,"URL":"https:\/\/doi.org\/10.1109\/smc58881.2025.11343475","relation":{},"subject":[],"published":{"date-parts":[[2025,10,5]]}}}