{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:54:03Z","timestamp":1781538843678,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100018537","name":"National Science and Technology Major Project","doi-asserted-by":"publisher","award":["2025ZD0122903"],"award-info":[{"award-number":["2025ZD0122903"]}],"id":[{"id":"10.13039\/501100018537","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Natural Science Foundation of China","award":["U25A20533"],"award-info":[{"award-number":["U25A20533"]}]},{"name":"National Natural Science Foundation of China","award":["62276129"],"award-info":[{"award-number":["62276129"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62506164"],"award-info":[{"award-number":["62506164"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu Province","doi-asserted-by":"publisher","award":["BK20250082"],"award-info":[{"award-number":["BK20250082"]}],"id":[{"id":"10.13039\/501100004608","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Natural Science Foundation of Jiangsu Province","award":["BK20251387"],"award-info":[{"award-number":["BK20251387"]}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["NE2025010"],"award-info":[{"award-number":["NE2025010"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100018537","name":"Postdoctoral Fellowship Program of CPSF","doi-asserted-by":"publisher","award":["GZC20252739"],"award-info":[{"award-number":["GZC20252739"]}],"id":[{"id":"10.13039\/501100018537","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Jiangsu Funding Program for Excellent Postdoctoral Talent","award":["2025ZB489"],"award-info":[{"award-number":["2025ZB489"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810883","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"865-873","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["PRISM: Preference-Guided Semantic Reasoning with Vision-Language Models for Object Goal Navigation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-2848-2332","authenticated-orcid":false,"given":"Zijian","family":"Liu","sequence":"first","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5959-4294","authenticated-orcid":false,"given":"Cong","family":"Pan","sequence":"additional","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3574-4213","authenticated-orcid":false,"given":"Chengjie","family":"Fan","sequence":"additional","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7290-2081","authenticated-orcid":false,"given":"Wanjie","family":"Cai","sequence":"additional","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5008-3986","authenticated-orcid":false,"given":"Xichen","family":"Ding","sequence":"additional","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0306-534X","authenticated-orcid":false,"given":"Jie","family":"Qin","sequence":"additional","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Peter Anderson Angel Chang Devendra\u00a0Singh Chaplot Alexey Dosovitskiy Saurabh Gupta Vladlen Koltun Jana Kosecka Jitendra Malik Roozbeh Mottaghi Manolis Savva et\u00a0al. 2018. On evaluation of embodied navigation agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1807.06757 (2018)."},{"key":"e_1_3_3_1_3_2","unstructured":"Shuai Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Sibo Song Kai Dang Peng Wang Shijie Wang Jun Tang et\u00a0al. 2025. Qwen2. 5-vl technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.13923 (2025)."},{"key":"e_1_3_3_1_4_2","unstructured":"Dhruv Batra Aaron Gokaslan Aniruddha Kembhavi Oleksandr Maksymets Roozbeh Mottaghi Manolis Savva Alexander Toshev and Erik Wijmans. 2020. Objectnav revisited: On evaluation of embodied agents navigating to objects. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2006.13171 (2020)."},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610499"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"crossref","unstructured":"Angel Chang Angela Dai Thomas Funkhouser Maciej Halber Matthias Niessner Manolis Savva Shuran Song Andy Zeng and Yinda Zhang. 2017. Matterport3d: Learning from rgb-d data in indoor environments. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1709.06158 (2017).","DOI":"10.1109\/3DV.2017.00081"},{"key":"e_1_3_3_1_7_2","unstructured":"Devendra\u00a0Singh Chaplot Dhiraj\u00a0Prakashchand Gandhi Abhinav Gupta and Russ\u00a0R Salakhutdinov. 2020. Object goal navigation using goal-oriented semantic exploration. Advances in Neural Information Processing Systems 33 (2020) 4247\u20134258."},{"key":"e_1_3_3_1_8_2","first-page":"12875","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"Chaplot Devendra\u00a0Singh","year":"2020","unstructured":"Devendra\u00a0Singh Chaplot, Ruslan Salakhutdinov, Abhinav Gupta, and Saurabh Gupta. 2020. Neural topological slam for visual navigation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 12875\u201312884."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01501"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Matt Deitke Eli VanderBilt Alvaro Herrasti Luca Weihs Kiana Ehsani Jordi Salvador Winson Han Eric Kolve Aniruddha Kembhavi and Roozbeh Mottaghi. 2022. ProcTHOR: Large-Scale Embodied AI Using Procedural Generation. Advances in Neural Information Processing Systems 35 (2022) 5982\u20135994.","DOI":"10.52202\/068431-0433"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v40i22.38885"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00252"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58571-6_2"},{"key":"e_1_3_3_1_14_2","unstructured":"Chengjie Fan Cong Pan Zijian Liu Ningzhong Liu and Jie Qin. 2026. HTNav: A Hybrid Navigation Framework with Tiered Structure for Urban Aerial Vision-and-Language Navigation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2604.08883 (2026)."},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02219"},{"key":"e_1_3_3_1_16_2","unstructured":"Dylan Goetting Himanshu\u00a0Gaurav Singh and Antonio Loquercio. 2024. End-to-end navigation with vision language models: Transforming spatial reasoning into question-answering. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.05755 (2024)."},{"key":"e_1_3_3_1_17_2","unstructured":"Xianda Guo Ruijun Zhang Yiqun Duan Yuhang He Dujun Nie Wenke Huang Chenming Zhang Shuai Liu Hao Zhao and Long Chen. 2024. SURDS: Benchmarking Spatial Understanding and Reasoning in Driving Scenarios with Vision Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.13112 (2024)."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01441"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01441"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00997"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"crossref","unstructured":"Arjun Majumdar Gunjan Aggarwal Bhavika Devnani Judy Hoffman and Dhruv Batra. 2022. Zson: Zero-shot object-goal navigation using multimodal goal embeddings. Advances in Neural Information Processing Systems 35 (2022) 32340\u201332352.","DOI":"10.52202\/068431-2343"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01509"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"crossref","unstructured":"Dujun Nie Xianda Guo Yiqun Duan Ruijun Zhang and Long Chen. 2025. Wmnav: Integrating vision-language models into world models for object goal navigation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2503.02247 (2025).","DOI":"10.1109\/IROS60139.2025.11246684"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00925"},{"key":"e_1_3_3_1_26_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748\u20138763."},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01832"},{"key":"e_1_3_3_1_28_2","unstructured":"Santhosh\u00a0K Ramakrishnan Aaron Gokaslan Erik Wijmans Oleksandr Maksymets Alex Clegg John Turner Eric Undersander Wojciech Galuba Andrew Westbury Angel\u00a0X Chang et\u00a0al. 2021. Habitat-matterport 3d dataset (hm3d): 1000 large-scale 3d environments for embodied ai. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2109.08238 (2021)."},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00943"},{"key":"e_1_3_3_1_31_2","first-page":"2683","volume-title":"Conference on Robot Learning","author":"Shah Dhruv","year":"2023","unstructured":"Dhruv Shah, Michael\u00a0Robert Equi, B\u0142a\u017cej Osi\u0144ski, Fei Xia, Brian Ichter, and Sergey Levine. 2023. Navigation with large language models: Semantic guesswork as a heuristic for planning. In Conference on Robot Learning. PMLR, 2683\u20132699."},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"crossref","unstructured":"Jingwen Sun Jing Wu Ze Ji and Yu-Kun Lai. 2024. A survey of object goal navigation. IEEE Transactions on Automation Science and Engineering 22 (2024) 2292\u20132308.","DOI":"10.1109\/TASE.2024.3378010"},{"key":"e_1_3_3_1_33_2","first-page":"161","volume-title":"European Conference on Computer Vision","author":"Sun Xinyu","year":"2024","unstructured":"Xinyu Sun, Lizhao Liu, Hongyan Zhi, Ronghe Qiu, and Junwei Liang. 2024. Prioritized semantic learning for zero-shot instance navigation. In European Conference on Computer Vision. Springer, 161\u2013178."},{"key":"e_1_3_3_1_34_2","unstructured":"Christian Szegedy Wei Liu Yangqing Jia Pierre Sermanet Scott Reed Dragomir Anguelov Dumitru Erhan Vincent Vanhoucke and Andrew Rabinovich. 2015. Proceedings of the IEEE conference on computer vision and pattern recognition. 9\u00a0pages."},{"key":"e_1_3_3_1_35_2","unstructured":"Andrew Szot Alexander Clegg Eric Undersander Erik Wijmans Yili Zhao John Turner Noah Maestre Mustafa Mukadam Devendra\u00a0Singh Chaplot Oleksandr Maksymets et\u00a0al. 2021. Habitat 2.0: Training home assistants to rearrange their habitat. Advances in neural information processing systems 34 (2021) 251\u2013266."},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00896"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01432"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00691"},{"key":"e_1_3_3_1_39_2","unstructured":"Pengying Wu Yao Mu Bingxian Wu Yi Hou Ji Ma Shanghang Zhang and Chang Liu. 2024. Voronav: Voronoi-based zero-shot object navigation with large language model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.02695 (2024)."},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00945"},{"key":"e_1_3_3_1_41_2","unstructured":"Wei Yang Xiaolong Wang Ali Farhadi Abhinav Gupta and Roozbeh Mottaghi. 2018. Visual Semantic Navigation using Scene Priors. Cornell University - arXiv Cornell University - arXiv (Oct 2018)."},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01388"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01775"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610712"},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10342512"},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01003"},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19842-7_18"},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"crossref","unstructured":"Sixian Zhang Xinhang Song Yubing Bai Weijie Li Yakui Chu and Shuqiang Jiang. 2021. Hierarchical Object-to-Zone Graph for Object Navigation. Cornell University - arXiv Cornell University - arXiv (Sep 2021).","DOI":"10.1109\/ICCV48922.2021.01485"},{"key":"e_1_3_3_1_49_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01553"},{"key":"e_1_3_3_1_50_2","unstructured":"Xinxin Zhao Wenzhe Cai Likun Tang and Teng Wang. 2024. Imaginenav: Prompting vision-language models as embodied navigator through scene imagination. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.09874 (2024)."},{"key":"e_1_3_3_1_51_2","unstructured":"Linqing Zhong Chen Gao Zihan Ding Yue Liao Huimin Ma Shifeng Zhang Xu Zhou and Si Liu. 2024. Topv-nav: Unlocking the top-view spatial reasoning potential of mllm for zero-shot object navigation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.16425 (2024)."},{"key":"e_1_3_3_1_52_2","first-page":"42829","volume-title":"International Conference on Machine Learning","author":"Zhou Kaiwen","year":"2023","unstructured":"Kaiwen Zhou, Kaizhi Zheng, Connor Pryor, Yilin Shen, Hongxia Jin, Lise Getoor, and Xin\u00a0Eric Wang. 2023. Esc: Exploration with soft commonsense constraints for zero-shot object navigation. In International Conference on Machine Learning. PMLR, 42829\u201342842."},{"key":"e_1_3_3_1_53_2","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981766"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:57:12Z","timestamp":1781535432000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810883"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":52,"alternative-id":["10.1145\/3805622.3810883","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810883","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}