{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:20:25Z","timestamp":1765340425680,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":83,"publisher":"ACM","funder":[{"name":"National Defense Science and Technology Foundation Strengthening Program Funding","award":["Grant 2023-JCJQ-JJ-0219"],"award-info":[{"award-number":["Grant 2023-JCJQ-JJ-0219"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754947","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:47:18Z","timestamp":1761374838000},"page":"3007-3016","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["PAF: Prototype Adaptive Fusion for Test-Time Adaptation of Vision-Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-9718-6798","authenticated-orcid":false,"given":"Si","family":"Chen","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1533-9263","authenticated-orcid":false,"given":"Yujia","family":"Chen","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4557-2138","authenticated-orcid":false,"given":"Xiaotian","family":"Yin","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3791-3984","authenticated-orcid":false,"given":"Xin","family":"Liu","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9768-7861","authenticated-orcid":false,"given":"Huakai","family":"Lai","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1856-9564","authenticated-orcid":false,"given":"Tianzhu","family":"Zhang","sequence":"additional","affiliation":[{"name":"National Key Laboratory of Deep Space Exploration, University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"446","volume-title":"Switzerland","author":"Bossard Lukas","year":"2014","unstructured":"Lukas Bossard, Matthieu Guillaumin, and Luc Van Gool. 2014. Food-101-mining discriminative components with random forests. In Computer vision-ECCV 2014: 13th European conference, zurich, Switzerland, September 6-12, 2014, proceedings, part VI 13. Springer, 446-461."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00816"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00039"},{"key":"e_1_3_2_1_4_1","volume-title":"Clip is also a good teacher: A new learning framework for inductive zero-shot semantic segmentation. arXiv preprint arXiv:2310.02296","author":"Chen Jialei","year":"2023","unstructured":"Jialei Chen, Daisuke Deguchi, Chenkai Zhang, Xu Zheng, and Hiroshi Murase. 2023. Clip is also a good teacher: A new learning framework for inductive zero-shot semantic segmentation. arXiv preprint arXiv:2310.02296 (2023)."},{"key":"e_1_3_2_1_5_1","volume-title":"SAM-Glomeruli: Enhanced Segment Anything Model for Precise Glomeruli Segmentation. In International Workshop on Medical Optical Imaging and Virtual Microscopy Image Analysis. Springer, 182-191","author":"Chen Yujia","year":"2024","unstructured":"Yujia Chen, Wangkai Li, Zhaoyang Li, Rui Sun, Tianzhu Zhang, Zhiwei Xiong, and Feng Wu. 2024. SAM-Glomeruli: Enhanced Segment Anything Model for Precise Glomeruli Segmentation. In International Workshop on Medical Optical Imaging and Virtual Microscopy Image Analysis. Springer, 182-191."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i2.32234"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","unstructured":"Zhixin Cheng Jiacheng Deng Xinjun Li Baoqun Yin and Tianzhu Zhang. 2025a. B2-3D: Uncertainty-aware Hierarchical Registration Network with Domain Alignment. TechRxiv. https:\/\/doi.org\/10.36227\/techrxiv.174438606.68967828.v1 Preprint.","DOI":"10.36227\/techrxiv.174438606.68967828.v1"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i3.32251"},{"key":"e_1_3_2_1_9_1","volume-title":"CA-I2P: Channel-Adaptive Registration Network with Global Optimal Selection. arXiv preprint arXiv:2506.21364","author":"Cheng Zhixin","year":"2025","unstructured":"Zhixin Cheng, Jiacheng Deng, Xinjun Li, Xiaotian Yin, Bohao Liao, Baoqun Yin, Wenfei Yang, and Tianzhu Zhang. 2025c. CA-I2P: Channel-Adaptive Registration Network with Global Optimal Selection. arXiv preprint arXiv:2506.21364 (2025). arXiv:2506.21364 [cs.CV]"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.461"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_12_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_13_1","volume-title":"Frustratingly easy test-time adaptation of vision-language models. arXiv preprint arXiv:2405.18330","author":"Farina Matteo","year":"2024","unstructured":"Matteo Farina, Gianni Franchi, Giovanni Iacca, Massimiliano Mancini, and Elisa Ricci. 2024. Frustratingly easy test-time adaptation of vision-language models. arXiv preprint arXiv:2405.18330 (2024)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2004.383"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00255"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01891-x"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00217"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2019.2918242"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00823"},{"key":"e_1_3_2_1_21_1","volume-title":"Using self-supervised learning can improve model robustness and uncertainty. Advances in neural information processing systems","author":"Hendrycks Dan","year":"2019","unstructured":"Dan Hendrycks, Mantas Mazeika, Saurav Kadavath, and Dawn Song. 2019a. Using self-supervised learning can improve model robustness and uncertainty. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_22_1","volume-title":"Augmix: A simple data processing method to improve robustness and uncertainty. arXiv preprint arXiv:1912.02781","author":"Hendrycks Dan","year":"2019","unstructured":"Dan Hendrycks, Norman Mu, Ekin D Cubuk, Barret Zoph, Justin Gilmer, and Balaji Lakshminarayanan. 2019b. Augmix: A simple data processing method to improve robustness and uncertainty. arXiv preprint arXiv:1912.02781 (2019)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01501"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00550"},{"key":"e_1_3_2_1_25_1","volume-title":"International conference on machine learning. PMLR, 4904-4916","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc Le, Yun-Hsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling up visual and vision-language representation learning with noisy text supervision. In International conference on machine learning. PMLR, 4904-4916."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01343"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01832"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01394"},{"key":"e_1_3_2_1_29_1","volume-title":"Clip can understand depth. arXiv preprint arXiv:2402.03251","author":"Kim Dunam","year":"2024","unstructured":"Dunam Kim and Seokju Lee. 2024. Clip can understand depth. arXiv preprint arXiv:2402.03251 (2024)."},{"key":"e_1_3_2_1_30_1","volume-title":"Benoit R Cottereau, Liangjun Zhang, Hesheng Wang, Wei Tsang Ooi, Ruijie Zhu, et al.","author":"Kong Lingdong","year":"2023","unstructured":"Lingdong Kong, Yaru Niu, Shaoyuan Xie, Hanjiang Hu, Lai Xing Ng, Benoit R Cottereau, Liangjun Zhang, Hesheng Wang, Wei Tsang Ooi, Ruijie Zhu, et al., 2023. The robodepth challenge: Methods and advancements towards robust depth estimation. arXiv preprint arXiv:2307.15061 (2023)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2013.77"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00823"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i5.32540"},{"key":"e_1_3_2_1_34_1","volume-title":"SAS: Segment Any 3D Scene with Integrated 2D Priors. arXiv preprint arXiv:2503.08512","author":"Li Zhuoyuan","year":"2025","unstructured":"Zhuoyuan Li, Jiahao Lu, Jiacheng Deng, Hanzhi Chang, Lifan Wu, Yanzhe Liang, and Tianzhu Zhang. 2025b. SAS: Segment Any 3D Scene with Integrated 2D Priors. arXiv preprint arXiv:2503.08512 (2025)."},{"key":"e_1_3_2_1_35_1","volume-title":"Plane2depth: Hierarchical adaptive plane guidance for monocular depth estimation","author":"Liu Li","year":"2024","unstructured":"Li Liu, Ruijie Zhu, Jiacheng Deng, Ziyang Song, Wenfei Yang, and Tianzhu Zhang. 2024c. Plane2depth: Hierarchical adaptive plane guidance for monocular depth estimation. IEEE Transactions on Circuits and Systems for Video Technology (2024)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3424566"},{"key":"e_1_3_2_1_37_1","volume-title":"Self-supervised learning: Generative or contrastive","author":"Liu Xiao","year":"2021","unstructured":"Xiao Liu, Fanjin Zhang, Zhenyu Hou, Li Mian, Zhaoyu Wang, Jing Zhang, and Jie Tang. 2021b. Self-supervised learning: Generative or contrastive. IEEE transactions on knowledge and data engineering, Vol. 35, 1 (2021), 857-876."},{"key":"e_1_3_2_1_38_1","first-page":"21808","article-title":"Ttt: When does self-supervised test-time training fail or thrive","volume":"34","author":"Liu Yuejiang","year":"2021","unstructured":"Yuejiang Liu, Parth Kothari, Bastien Van Delft, Baptiste Bellot-Gurlet, Taylor Mordan, and Alexandre Alahi. 2021a. Ttt: When does self-supervised test-time training fail or thrive? Advances in Neural Information Processing Systems, Vol. 34 (2021), 21808-21820.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i13.29320"},{"key":"e_1_3_2_1_40_1","first-page":"84114","article-title":"Dn-4dgs: Denoised deformable network with temporal-spatial aggregation for dynamic scene rendering","volume":"37","author":"Lu Jiahao","year":"2024","unstructured":"Jiahao Lu, Jiacheng Deng, Ruijie Zhu, Yanzhe Liang, Wenfei Yang, Xu Zhou, and Tianzhu Zhang. 2024. Dn-4dgs: Denoised deformable network with temporal-spatial aggregation for dynamic scene rendering. Advances in Neural Information Processing Systems, Vol. 37 (2024), 84114-84138.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_41_1","volume-title":"Fine-grained visual classification of aircraft. arXiv preprint arXiv:1306.5151","author":"Maji Subhransu","year":"2013","unstructured":"Subhransu Maji, Esa Rahtu, Juho Kannala, Matthew Blaschko, and Andrea Vedaldi. 2013. Fine-grained visual classification of aircraft. arXiv preprint arXiv:1306.5151 (2013)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248092"},{"key":"e_1_3_2_1_44_1","volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"key":"e_1_3_2_1_45_1","volume-title":"International conference on machine learning. PMLR, 5389-5400","author":"Recht Benjamin","year":"2019","unstructured":"Benjamin Recht, Rebecca Roelofs, Ludwig Schmidt, and Vaishaal Shankar. 2019. Do imagenet classifiers generalize to imagenet?. In International conference on machine learning. PMLR, 5389-5400."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_47_1","volume-title":"Consistency-guided prompt learning for vision-language models. arXiv preprint arXiv:2306.01195","author":"Roy Shuvendu","year":"2023","unstructured":"Shuvendu Roy and Ali Etemad. 2023. Consistency-guided prompt learning for vision-language models. arXiv preprint arXiv:2306.01195 (2023)."},{"key":"e_1_3_2_1_48_1","first-page":"14274","article-title":"Test-time prompt tuning for zero-shot generalization in vision-language models","volume":"35","author":"Shu Manli","year":"2022","unstructured":"Manli Shu, Weili Nie, De-An Huang, Zhiding Yu, Tom Goldstein, Anima Anandkumar, and Chaowei Xiao. 2022. Test-time prompt tuning for zero-shot generalization in vision-language models. Advances in Neural Information Processing Systems, Vol. 35 (2022), 14274-14289.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_49_1","volume-title":"Depthmaster: Taming diffusion models for monocular depth estimation. arXiv preprint arXiv:2501.02576","author":"Song Ziyang","year":"2025","unstructured":"Ziyang Song, Zerong Wang, Bo Li, Hao Zhang, Ruijie Zhu, Li Liu, Peng-Tao Jiang, and Tianzhu Zhang. 2025. Depthmaster: Taming diffusion models for monocular depth estimation. arXiv preprint arXiv:2501.02576 (2025)."},{"key":"e_1_3_2_1_50_1","volume-title":"Amir Roshan Zamir, and Mubarak Shah","author":"Soomro Khurram","year":"2012","unstructured":"Khurram Soomro, Amir Roshan Zamir, and Mubarak Shah. 2012. UCF101: A dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012)."},{"key":"e_1_3_2_1_51_1","volume-title":"International conference on machine learning. PMLR, 9229-9248","author":"Sun Yu","year":"2020","unstructured":"Yu Sun, Xiaolong Wang, Zhuang Liu, John Miller, Alexei Efros, and Moritz Hardt. 2020. Test-time training with self-supervision for generalization under distribution shifts. In International conference on machine learning. PMLR, 9229-9248."},{"key":"e_1_3_2_1_52_1","volume-title":"Tent: Fully test-time adaptation by entropy minimization. arXiv preprint arXiv:2006.10726","author":"Wang Dequan","year":"2020","unstructured":"Dequan Wang, Evan Shelhamer, Shaoteng Liu, Bruno Olshausen, and Trevor Darrell. 2020. Tent: Fully test-time adaptation by entropy minimization. arXiv preprint arXiv:2006.10726 (2020)."},{"key":"e_1_3_2_1_53_1","volume-title":"Learning robust global representations by penalizing local predictive power. Advances in neural information processing systems","author":"Wang Haohan","year":"2019","unstructured":"Haohan Wang, Songwei Ge, Zachary Lipton, and Eric P Xing. 2019a. Learning robust global representations by penalizing local predictive power. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00929"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612490"},{"key":"e_1_3_2_1_56_1","volume-title":"Beyond Any-Shot Adaptation: Predicting Optimization Outcome for Robustness Gains without Extra Pay. arXiv preprint arXiv:2501.11039","author":"Wang Qi Cheems","year":"2025","unstructured":"Qi Cheems Wang, Zehao Xiao, Yixiu Mao, Yun Qu, Jiayi Shen, Yiqin Lv, and Xiangyang Ji. 2025. Beyond Any-Shot Adaptation: Predicting Optimization Outcome for Robustness Gains without Extra Pay. arXiv preprint arXiv:2501.11039 (2025)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612107"},{"key":"e_1_3_2_1_58_1","first-page":"3485","article-title":"Sun database: Large-scale scene recognition from abbey to zoo. In 2010 IEEE computer society conference on computer vision and pattern recognition","author":"Xiao Jianxiong","year":"2010","unstructured":"Jianxiong Xiao, James Hays, Krista A Ehinger, Aude Oliva, and Antonio Torralba. 2010. Sun database: Large-scale scene recognition from abbey to zoo. In 2010 IEEE computer society conference on computer vision and pattern recognition. IEEE, 3485-3492.","journal-title":"IEEE"},{"key":"e_1_3_2_1_59_1","volume-title":"DynaPrompt: Dynamic Test-Time Prompt Tuning. arXiv preprint arXiv:2501.16404","author":"Xiao Zehao","year":"2025","unstructured":"Zehao Xiao, Shilin Yan, Jack Hong, Jiayin Cai, Xiaolong Jiang, Yao Hu, Jiayi Shen, Qi Wang, and Cees GM Snoek. 2025. DynaPrompt: Dynamic Test-Time Prompt Tuning. arXiv preprint arXiv:2501.16404 (2025)."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01571"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3327924"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01522"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00653"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02212"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1049\/cje.2021.00.455"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i21.34372"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3432753"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01049"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00927"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00156"},{"key":"e_1_3_2_1_71_1","first-page":"32111","article-title":"Dual prototype evolving for test-time generalization of vision-language models","volume":"37","author":"Zhang Ce","year":"2024","unstructured":"Ce Zhang, Simon Stepputtis, Katia Sycara, and Yaqi Xie. 2024b. Dual prototype evolving for test-time generalization of vision-language models. Advances in Neural Information Processing Systems, Vol. 37 (2024), 32111-32136.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_72_1","volume-title":"Tip-adapter: Training-free clip-adapter for better vision-language modeling. arXiv preprint arXiv:2111.03930","author":"Zhang Renrui","year":"2021","unstructured":"Renrui Zhang, Rongyao Fang, Wei Zhang, Peng Gao, Kunchang Li, Jifeng Dai, Yu Qiao, and Hongsheng Li. 2021. Tip-adapter: Training-free clip-adapter for better vision-language modeling. arXiv preprint arXiv:2111.03930 (2021)."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.23919\/cje.2022.00.396"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01460"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02713"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00298"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01631"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3387116"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01075"},{"key":"e_1_3_2_1_81_1","first-page":"101790","article-title":"Motiongs: Exploring explicit motion guidance for deformable 3d gaussian splatting","volume":"37","author":"Zhu Ruijie","year":"2024","unstructured":"Ruijie Zhu, Yanzhe Liang, Hanzhi Chang, Jiacheng Deng, Jiahao Lu, Wenfei Yang, Tianzhu Zhang, and Yongdong Zhang. 2024a. Motiongs: Exploring explicit motion guidance for deformable 3d gaussian splatting. Advances in Neural Information Processing Systems, Vol. 37 (2024), 101790-101817.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3335316"},{"key":"e_1_3_2_1_83_1","volume-title":"Scaledepth: Decomposing metric depth estimation into scale prediction and relative depth estimation. arXiv preprint arXiv:2407.08187","author":"Zhu Ruijie","year":"2024","unstructured":"Ruijie Zhu, Chuxin Wang, Ziyang Song, Li Liu, Tianzhu Zhang, and Yongdong Zhang. 2024b. Scaledepth: Decomposing metric depth estimation into scale prediction and relative depth estimation. arXiv preprint arXiv:2407.08187 (2024)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754947","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:17:02Z","timestamp":1765340222000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754947"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":83,"alternative-id":["10.1145\/3746027.3754947","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754947","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}