{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T17:11:08Z","timestamp":1775841068060,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":64,"publisher":"ACM","funder":[{"name":"New Generation Artificial Intelligence-National Science and Technology Major Project","award":["2025ZD0122801"],"award-info":[{"award-number":["2025ZD0122801"]}]},{"name":"National Natural Science Foundation of China, Cell Therapy Designed by Artificial Intelligence","award":["62525604"],"award-info":[{"award-number":["62525604"]}]},{"name":"National Natural Science Foundation of China","award":["62276063"],"award-info":[{"award-number":["62276063"]}]},{"name":"National Natural Science Foundation of China","award":["U23B2057"],"award-info":[{"award-number":["U23B2057"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,13]]},"DOI":"10.1145\/3774904.3792591","type":"proceedings-article","created":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T21:54:34Z","timestamp":1775771674000},"page":"1410-1421","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["WPIS: From In-the-Wild Web Images to Physics-Aware 3D Scene Graphs for Physical Reasoning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2085-5028","authenticated-orcid":false,"given":"Ke","family":"Ma","sequence":"first","affiliation":[{"name":"School of Artificial Intelligence and Automation, Huazhong University of Science and Technology, Wuhan, Hubei, China and College of Design and Innovation, Tongji University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2436-9086","authenticated-orcid":false,"given":"Cong","family":"Fu","sequence":"additional","affiliation":[{"name":"School of Software and Engineering, Huazhong University of Science and Technology, Wuhan, Hubei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-7251-9863","authenticated-orcid":false,"given":"Jianing","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Software and Engineering, Huazhong University of Science and Technology, Wuhan, Hubei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5800-2098","authenticated-orcid":false,"given":"Yifei","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Software and Engineering, Huazhong University of Science and Technology, Wuhan, Hubei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0008-9895","authenticated-orcid":false,"given":"Wenyuan","family":"Li","sequence":"additional","affiliation":[{"name":"School of Software and Engineering, Huazhong University of Science and Technology, Wuhan, Hubei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6732-7823","authenticated-orcid":false,"given":"Xinggang","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Electronic Information and Communications, Huazhong University of Science and Technology, Wuhan, Hubei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2293-1709","authenticated-orcid":false,"given":"Meng","family":"Wang","sequence":"additional","affiliation":[{"name":"College of Design and Innovation, Tongji University, Shanghai, China and Shanghai Institute for Intelligent Autonomous Systems, Tongji University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1721-6126","authenticated-orcid":false,"given":"Tian","family":"Xia","sequence":"additional","affiliation":[{"name":"School of Software and Engineering, Huazhong University of Science and Technology, Wuhan, Hubei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,4,12]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Conference on Robot Learning (CoRL).","author":"Agia Christopher","year":"2022","unstructured":"Christopher Agia, Krishna Murthy Jatavallabhula, Mohamed Khodeir, Ondrej Miksik, Vibhav Vineet, Mustafa Mukadam, Liam Paull, and Florian Shkurti. 2022. Taskography: Evaluating Robot Task Planning over Large 3D Scene Graphs. In Conference on Robot Learning (CoRL)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00576"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-023-06792-0"},{"key":"e_1_3_2_1_4_1","volume-title":"ContactPose: A Dataset of Grasps with Object Contact and Hand Pose. In European Conference on Computer Vision (ECCV). 361-378","author":"Brahmbhatt Samarth","year":"2020","unstructured":"Samarth Brahmbhatt, Chengcheng Tang, Christopher D. Twigg, Charles C. Kemp, and James Hays. 2020. ContactPose: A Dataset of Grasps with Object Contact and Hand Pose. In European Conference on Computer Vision (ECCV). 361-378."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-2442-2"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Yiqin Cai Cong Huang Junwei Yang Shikai Tang Ming Sun Baoxin Zhou and Yifan Liu. 2023. Consistent Depth Prediction for Transparent Object Reconstruction from RGB-D Camera. In Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). https:\/\/openaccess.thecvf.com\/content\/ICCV2023\/papers\/Cai_Consistent_Depth_Prediction_for_Transparent_Object_Reconstruction_from_RGB-D_Camera_ICCV_2023_paper.pdf","DOI":"10.1109\/ICCV51070.2023.00320"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00081"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00893"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20074-8_22"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.261"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01377"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00182"},{"key":"e_1_3_2_1_13_1","volume-title":"International Conference on Intelligent Autonomous Systems. 210-221","author":"Do Chau","year":"2018","unstructured":"Chau Do and Wolfram Burgard. 2018. Accurate Pouring with an Autonomous Robot Using an RGB-D Camera. In International Conference on Intelligent Autonomous Systems. 210-221."},{"key":"e_1_3_2_1_14_1","volume-title":"WorldScore: A Unified Evaluation Benchmark for World Generation. arXiv preprint arXiv:2504.00983","author":"Duan Haoyi","year":"2025","unstructured":"Haoyi Duan, Hong-Xing Yu, Sirui Chen, Li Fei-Fei, and Jiajun Wu. 2025. WorldScore: A Unified Evaluation Benchmark for World Generation. arXiv preprint arXiv:2504.00983 (2025)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1021\/acscentsci.0c00460"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.isci.2023.107243"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/VL\/HCC60511.2024.00020"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3714238"},{"key":"e_1_3_2_1_19_1","volume-title":"Bipasha Sen, Aditya Agarwal, Corban Rivera, William Paul, Kirsty Ellis, Rama Chellappa, et al.","author":"Gu Qiao","year":"2024","unstructured":"Qiao Gu, Ali Kuwajerwala, Sacha Morin, Krishna Murthy Jatavallabhula, Bipasha Sen, Aditya Agarwal, Corban Rivera, William Paul, Kirsty Ellis, Rama Chellappa, et al., 2024. Conceptgraphs: Open-vocabulary 3d scene graphs for perception and planning. (2024), 5021-5028."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01249-6_2"},{"key":"e_1_3_2_1_21_1","volume-title":"European Conference on Computer Vision. Springer, 577-594","author":"Hoda\u0148 Tom\u00e1\u0161","year":"2020","unstructured":"Tom\u00e1\u0161 Hoda\u0148, Martin Sundermeyer, Bertram Drost, Yann Labb\u00e9, Eric Brachmann, Frank Michel, Carsten Rother, and Ji\u0159\u00ed Matas. 2020. BOP challenge 2020 on 6D object localization. In European Conference on Computer Vision. Springer, 577-594."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.3389\/fbioe.2020.571777"},{"key":"e_1_3_2_1_23_1","volume-title":"Hydra: A Real-time Spatial Perception System for 3D Scene Graph Construction and Optimization. arXiv preprint arXiv:2201.13360","author":"Hughes Nathan","year":"2022","unstructured":"Nathan Hughes, Yun Chang, and Luca Carlone. 2022a. Hydra: A Real-time Spatial Perception System for 3D Scene Graph Construction and Optimization. arXiv preprint arXiv:2201.13360 (2022). https:\/\/arxiv.org\/abs\/2201.13360"},{"key":"e_1_3_2_1_24_1","volume-title":"Hydra: A Real-time Spatial Perception System for 3D Scene Graph Construction and Optimization. In Robotics: Science and Systems (RSS). https:\/\/www.roboticsproceedings.org\/rss18\/p050.pdf","author":"Hughes Nathan","year":"2022","unstructured":"Nathan Hughes, Yun Chang, and Luca Carlone. 2022b. Hydra: A Real-time Spatial Perception System for 3D Scene Graph Construction and Optimization. In Robotics: Science and Systems (RSS). https:\/\/www.roboticsproceedings.org\/rss18\/p050.pdf"},{"key":"e_1_3_2_1_25_1","volume-title":"Segment Anything. In Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 4015-4026","author":"Kirillov Alexander","year":"2023","unstructured":"Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Doll\u00e1r, and Ross Girshick. 2023. Segment Anything. In Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 4015-4026."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01345"},{"key":"e_1_3_2_1_27_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models. In International conference on machine learning. PMLR, 19730-19742."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3694978"},{"key":"e_1_3_2_1_29_1","volume-title":"Visual Instruction Tuning. Advances in neural information processing systems","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2023. Visual Instruction Tuning. Advances in neural information processing systems, Vol. 36 (2023), 34892-34916."},{"key":"e_1_3_2_1_30_1","volume-title":"Monocular Depth Estimation and Segmentation for Transparent Object with Iterative Semantic and Geometric Fusion. arXiv preprint arXiv:2502.14616","author":"Liu Jiangyuan","year":"2025","unstructured":"Jiangyuan Liu, Hongxuan Ma, Yuxin Guo, Yuhao Zhao, Chi Zhang, Wei Sui, and Wei Zou. 2025. Monocular Depth Estimation and Segmentation for Transparent Object with Iterative Semantic and Geometric Fusion. arXiv preprint arXiv:2502.14616 (2025)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Shilong Liu Zhaoyang Zeng Tianhe Ren Feng Li Hao Zhang Jie Yang Qing Jiang Chunyuan Li Jianwei Yang Hang Su et al. 2024b. Grounding dino: Marrying dino with grounded pre-training for open-set object detection. (2024) 38-55.","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109818"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-11051-2_93"},{"key":"e_1_3_2_1_34_1","volume-title":"Phys-Liquid: A Physics-Informed Dataset for Estimating 3D Geometry and","author":"Ma Ke","year":"2025","unstructured":"Ke Ma, Yizhou Fang, Jean-Baptiste Weibel, Shuai Tan, Xinggang Wang, Yang Xiao, Yi Fang, and Tian Xia. 2025. Phys-Liquid: A Physics-Informed Dataset for Estimating 3D Geometry and Volume of Transparent Deformable Liquids. arXiv preprint arXiv:2511.11077 (2025)."},{"key":"e_1_3_2_1_35_1","volume-title":"Clio: Real-time task-driven open-set 3d scene graphs","author":"Maggio Dominic","year":"2024","unstructured":"Dominic Maggio, Yun Chang, Nathan Hughes, Matthew Trang, Dan Griffith, Carlyn Dougherty, Eric Cristofalo, Lukas Schmid, and Luca Carlone. 2024. Clio: Real-time task-driven open-set 3d scene graphs. IEEE Robotics and Automation Letters (2024)."},{"key":"e_1_3_2_1_36_1","volume-title":"Learn from global correlations: Enhancing evolutionary algorithm via spectral gnn. arXiv preprint arXiv:2412.17629","author":"Ouyang Kaichen","year":"2024","unstructured":"Kaichen Ouyang, Zong Ke, Shengwei Fu, Lingjie Liu, Puning Zhao, and Dayu Hu. 2024. Learn from global correlations: Enhancing evolutionary algorithm via spectral gnn. arXiv preprint arXiv:2412.17629 (2024)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25293"},{"key":"e_1_3_2_1_38_1","volume-title":"NTIRE 2025 challenge on hr depth from images of specular and transparent surfaces. In Proceedings of the Computer Vision and Pattern Recognition Conference. 987-1001","author":"Ramirez Pierluigi Zama","year":"2025","unstructured":"Pierluigi Zama Ramirez, Fabio Tosi, Luigi Di Stefano, Radu Timofte, Alex Costanzino, Matteo Poggi, Samuele Salti, Stefano Mattoccia, Zhe Zhang, Yang Yang, et al., 2025. NTIRE 2025 challenge on hr depth from images of specular and transparent surfaces. In Proceedings of the Computer Vision and Pattern Recognition Conference. 987-1001."},{"key":"e_1_3_2_1_39_1","volume-title":"Conference on Robot Learning (CoRL).","author":"Rana Karmesh","year":"2023","unstructured":"Karmesh Rana, Jesse Haviland, Sourav Garg, Jad Abou-Chakra, Ian Reid, and Niko Suenderhauf. 2023. SayPlan: Grounding Large Language Models Using 3D Scene Graphs for Scalable Robot Task Planning. In Conference on Robot Learning (CoRL)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"e_1_3_2_1_41_1","volume-title":"Towards robust monocular depth estimation: Mixing datasets for zero-shot cross-dataset transfer","author":"Ranftl Ren\u00e9","year":"2020","unstructured":"Ren\u00e9 Ranftl, Katrin Lasinger, David Hafner, Konrad Schindler, and Vladlen Koltun. 2020. Towards robust monocular depth estimation: Mixing datasets for zero-shot cross-dataset transfer. IEEE transactions on pattern analysis and machine intelligence, Vol. 44, 3 (2020), 1623-1637."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Shreeyak Sajjan Matthew Moore Mike Pan Ganesh Nagaraja Johnny Lee Andy Zeng and Shuran Song. 2020. ClearGrasp: 3D Shape Estimation of Transparent Objects for Manipulation. (2020) 3634-3642.","DOI":"10.1109\/ICRA40945.2020.9197518"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Daniel Schober Ronja G\u00fcldenring James Love and Lazaros Nalpantidis. 2025. Vision-based robot manipulation of transparent liquid containers in a laboratory setting. (2025) 1193-1200.","DOI":"10.1109\/SII59315.2025.10870900"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00851"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-023-06734-w"},{"key":"e_1_3_2_1_46_1","volume-title":"GRAB: A Dataset of Whole-Body Human Grasping of Objects. In European Conference on Computer Vision (ECCV). 581-600","author":"Taheri Omid","year":"2020","unstructured":"Omid Taheri, Nima Ghorbani, Michael J. Black, and Dimitrios Tzionas. 2020. GRAB: A Dataset of Whole-Body Human Grasping of Objects. In European Conference on Computer Vision (ECCV). 581-600."},{"key":"e_1_3_2_1_47_1","volume-title":"CoDance: An Unbind-Rebind Paradigm for Robust Multi-Subject Animation. arXiv preprint arXiv:2601.11096","author":"Tan Shuai","year":"2026","unstructured":"Shuai Tan, Biao Gong, Ke Ma, Yutong Feng, Qiyuan Zhang, Yan Wang, Yujun Shen, and Hengshuang Zhao. 2026. CoDance: An Unbind-Rebind Paradigm for Robust Multi-Subject Animation. arXiv preprint arXiv:2601.11096 (2026)."},{"key":"e_1_3_2_1_48_1","volume-title":"Animate-x: Universal character image animation with enhanced motion representation. arXiv preprint arXiv:2410.10306","author":"Tan Shuai","year":"2024","unstructured":"Shuai Tan, Biao Gong, Xiang Wang, Shiwei Zhang, Dandan Zheng, Ruobing Zheng, Kecheng Zheng, Jingdong Chen, and Ming Yang. 2024a. Animate-x: Universal character image animation with enhanced motion representation. arXiv preprint arXiv:2410.10306 (2024)."},{"key":"e_1_3_2_1_49_1","volume-title":"European Conference on Computer Vision. Springer, 398-416","author":"Tan Shuai","year":"2024","unstructured":"Shuai Tan, Bin Ji, Mengxiao Bi, and Ye Pan. 2024b. Edtalk: Efficient disentanglement for emotional talking head synthesis. In European Conference on Computer Vision. Springer, 398-416."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00402"},{"key":"e_1_3_2_1_51_1","volume-title":"Methods and Practices. In European Conference on Computer Vision (ECCV) (Lecture Notes in Computer Science). Springer, 198-214","author":"Wang Xiayu","year":"2024","unstructured":"Xiayu Wang, Ke Ma, Ruiyun Zhong, Xinggang Wang, Yi Fang, Yang Xiao, and Tian Xia. 2024. Towards Dual Transparent Liquid Level Estimation in Biomedical Lab: Dataset, Methods and Practices. In European Conference on Computer Vision (ECCV) (Lecture Notes in Computer Science). Springer, 198-214. LNCS."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"Bowen Wen Wei Yang Jan Kautz and Stan Birchfield. 2024. FoundationPose: Unified 6D Pose Estimation and Tracking of Novel Objects. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). https:\/\/openaccess.thecvf.com\/content\/CVPR2024\/papers\/Wen_FoundationPose_Unified_6D_Pose_Estimation_and_Tracking_of_Novel_Objects_CVPR_2024_paper.pdf","DOI":"10.1109\/CVPR52733.2024.01692"},{"key":"e_1_3_2_1_53_1","unstructured":"Shun-Cheng Wu Antoni Rosinol Yun Chang and Luca Carlone. 2021. SceneGraphFusion: Incremental 3D Scene Graph Prediction From RGB-D Sequences. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). https:\/\/openaccess.thecvf.com\/content\/CVPR2021\/papers\/Wu_SceneGraphFusion_Incremental_3D_Scene_Graph_Prediction_From_RGB-D_Sequences_CVPR_2021_paper.pdf"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.3390\/s23156656"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-023-41951-x"},{"key":"e_1_3_2_1_56_1","volume-title":"AAAI Conference on Artificial Intelligence (AAAI).","author":"Xu Wenting","unstructured":"Wenting Xu, Viorela Ila, Luping Zhou, and Craig T. Jin. 2025. TB-HSU: Hierarchical 3D Scene Understanding with Contextual Affordances. In AAAI Conference on Artificial Intelligence (AAAI)."},{"key":"e_1_3_2_1_57_1","volume-title":"M 2 diffuser: Diffusion-based trajectory optimization for mobile manipulation in 3d scenes","author":"Yan Sixu","year":"2025","unstructured":"Sixu Yan, Zeyu Zhang, Muzhi Han, Zaijin Wang, Qi Xie, Zhitian Li, Zhehan Li, Hangxin Liu, Xinggang Wang, and Song-Chun Zhu. 2025. M 2 diffuser: Diffusion-based trajectory optimization for mobile manipulation in 3d scenes. IEEE Transactions on Pattern Analysis and Machine Intelligence (2025)."},{"key":"e_1_3_2_1_58_1","volume-title":"Llm meets scene graph: Can large language models understand and generate scene graphs? a benchmark and empirical study. arXiv preprint arXiv:2505.19510","author":"Yang Dongil","year":"2025","unstructured":"Dongil Yang, Minjin Kim, Sunghwan Kim, Beong-woo Kwak, Minjun Park, Jinseok Hong, Woontack Woo, and Jinyoung Yeo. 2025. Llm meets scene graph: Can large language models understand and generate scene graphs? a benchmark and empirical study. arXiv preprint arXiv:2505.19510 (2025)."},{"key":"e_1_3_2_1_59_1","first-page":"21875","article-title":"Depth anything v2","volume":"37","author":"Yang Lihe","year":"2024","unstructured":"Lihe Yang, Bingyi Kang, Zilong Huang, Zhen Zhao, Xiaogang Xu, Jiashi Feng, and Hengshuang Zhao. 2024. Depth anything v2. Advances in Neural Information Processing Systems, Vol. 37 (2024), 21875-21911.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_60_1","unstructured":"Hong-Xing Yu Haoyi Duan Charles Herrmann William T. Freeman and Jiajun Wu. 2025. WonderWorld: Interactive 3D Scene Generation from a Single Image. In CVPR."},{"key":"e_1_3_2_1_61_1","volume-title":"Wonderjourney: Going from Anywhere to Everywhere. In CVPR.","author":"Yu Hong-Xing","year":"2024","unstructured":"Hong-Xing Yu, Haoyi Duan, Junhwa Hur, Kyle Sargent, Michael Rubinstein, William T. Freeman, Forrester Cole, Deqing Sun, Noah Snavely, Jiajun Wu, and Charles Herrmann. 2024. Wonderjourney: Going from Anywhere to Everywhere. In CVPR."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01807"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00718"},{"key":"e_1_3_2_1_64_1","volume-title":"Lens: Learning to segment anything with unified reinforced reasoning. arXiv preprint arXiv:2508.14153","author":"Zhu Lianghui","year":"2025","unstructured":"Lianghui Zhu, Bin Ouyang, Yuxuan Zhang, Tianheng Cheng, Rui Hu, Haocheng Shen, Longjin Ran, Xiaoxin Chen, Li Yu, Wenyu Liu, et al., 2025b. Lens: Learning to segment anything with unified reinforced reasoning. arXiv preprint arXiv:2508.14153 (2025)."}],"event":{"name":"WWW '26: The ACM Web Conference 2026","location":"Dubai United Arab Emirates","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2026"],"original-title":[],"deposited":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T16:27:23Z","timestamp":1775838443000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3774904.3792591"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,12]]},"references-count":64,"alternative-id":["10.1145\/3774904.3792591","10.1145\/3774904"],"URL":"https:\/\/doi.org\/10.1145\/3774904.3792591","relation":{},"subject":[],"published":{"date-parts":[[2026,4,12]]},"assertion":[{"value":"2026-04-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}