{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T18:21:15Z","timestamp":1771698075771,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172032"],"award-info":[{"award-number":["62172032"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681168","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"4129-4137","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Digging into Contrastive Learning for Robust Depth Estimation with Diffusion Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-0895-4835","authenticated-orcid":false,"given":"Jiyuan","family":"Wang","sequence":"first","affiliation":[{"name":"Beijing Jiaotong University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2847-0349","authenticated-orcid":false,"given":"Chunyu","family":"Lin","sequence":"additional","affiliation":[{"name":"Beijing Jiaotong University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7776-889X","authenticated-orcid":false,"given":"Lang","family":"Nie","sequence":"additional","affiliation":[{"name":"Beijing Jiaotong University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9429-1096","authenticated-orcid":false,"given":"Kang","family":"Liao","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8057-1599","authenticated-orcid":false,"given":"Shuwei","family":"Shao","sequence":"additional","affiliation":[{"name":"Beihang University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8581-9554","authenticated-orcid":false,"given":"Yao","family":"Zhao","sequence":"additional","affiliation":[{"name":"Beijing Jiaotong University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01170"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","unstructured":"Nicholas Charron Stephen Phillips and Steven Waslander. 2018. De-noising of Lidar Point Clouds Corrupted by Snowfall. 254--261. https:\/\/doi.org\/10.1109\/CRV.2018.00043","DOI":"10.1109\/CRV.2018.00043"},{"key":"e_1_3_2_1_3_1","volume-title":"Diffusiondet: Diffusion model for object detection. arXiv preprint arXiv:2211.09788","author":"Chen Shoufa","year":"2022","unstructured":"Shoufa Chen, Peize Sun, Yibing Song, and Ping Luo. 2022. Diffusiondet: Diffusion model for object detection. arXiv preprint arXiv:2211.09788 (2022)."},{"key":"e_1_3_2_1_4_1","volume-title":"Exploring Simple Siamese Representation Learning. arxiv","author":"Chen Xinlei","year":"2011","unstructured":"Xinlei Chen and Kaiming He. 2020. Exploring Simple Siamese Representation Learning. arxiv: 2011.10566 [cs.CV]"},{"key":"e_1_3_2_1_5_1","volume-title":"Diffusiondepth: Diffusion denoising approach for monocular depth estimation. arXiv preprint arXiv:2303.05021","author":"Duan Yiqun","year":"2023","unstructured":"Yiqun Duan, Xianda Guo, and Zheng Zhu. 2023. Diffusiondepth: Diffusion denoising approach for monocular depth estimation. arXiv preprint arXiv:2303.05021 (2023)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.304"},{"key":"e_1_3_2_1_7_1","volume-title":"Depth map prediction from a single image using a multi-scale deep network. Advances in neural information processing systems","author":"Eigen David","year":"2014","unstructured":"David Eigen, Christian Puhrsch, and Rob Fergus. 2014. Depth map prediction from a single image using a multi-scale deep network. Advances in neural information processing systems, Vol. 27 (2014)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2018.00214"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00751"},{"key":"e_1_3_2_1_10_1","volume-title":"Michael Firman, and Gabriel Brostow.","author":"Godard Cl\u00e9ment","year":"2019","unstructured":"Cl\u00e9ment Godard, Oisin Mac Aodha, Michael Firman, and Gabriel Brostow. 2019. Digging Into Self-Supervised Monocular Depth Estimation. arxiv: 1806.01260 [cs.CV]"},{"key":"e_1_3_2_1_11_1","volume-title":"Denoising Diffusion Probabilistic Models. arxiv","author":"Ho Jonathan","year":"2006","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. arxiv: 2006.11239 [cs.LG]"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/iccv51070.2023.01987"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Tobias Kalb and J\u00fcrgen Beyerer. 2023. Principles of Forgetting in Domain-Incremental Semantic Segmentation in Adverse Weather Conditions. arxiv: 2303.14115 [cs.CV]","DOI":"10.1109\/CVPR52729.2023.01869"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00907"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Neehar Kondapaneni Markus Marks Manuel Knott Rog\u00e9rio Guimar\u00e3es and Pietro Perona. 2023. Text-image Alignment for Diffusion-based Perception.","DOI":"10.1109\/CVPR52733.2024.01317"},{"key":"e_1_3_2_1_16_1","volume-title":"Benoit R. Cottereau, and Wei Tsang Ooi.","author":"Kong Lingdong","year":"2023","unstructured":"Lingdong Kong, Shaoyuan Xie, Hanjiang Hu, Lai Xing Ng, Benoit R. Cottereau, and Wei Tsang Ooi. 2023. RoboDepth: Robust Out-of-Distribution Depth Estimation under Corruptions. ArXiv, Vol. abs\/2310.15171 (2023). https:\/\/api.semanticscholar.org\/CorpusID:264436593"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Iro Laina Christian Rupprecht Vasileios Belagiannis Federico Tombari and Nassir Navab. 2016. Deeper Depth Prediction with Fully Convolutional Residual Networks. arxiv: 1606.00373 [cs.CV]","DOI":"10.1109\/3DV.2016.32"},{"key":"e_1_3_2_1_18_1","volume-title":"DifFlow3D: Toward Robust Uncertainty-Aware Scene Flow Estimation with Diffusion Model. arXiv preprint arXiv:2311.17456","author":"Liu Jiuming","year":"2023","unstructured":"Jiuming Liu, Guangming Wang, Weicai Ye, Chaokang Jiang, Jinru Han, Zhe Liu, Guofeng Zhang, Dalong Du, and Hesheng Wang. 2023. DifFlow3D: Toward Robust Uncertainty-Aware Scene Flow Estimation with Diffusion Model. arXiv preprint arXiv:2311.17456 (2023)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Lina Liu Xibin Song Mengmeng Wang Yong Liu and Liangjun Zhang. 2021. Self-supervised Monocular Depth Estimation for All Day Images using Domain Separation. arxiv: 2108.07628 [cs.CV]","DOI":"10.1109\/ICCV48922.2021.01250"},{"key":"e_1_3_2_1_20_1","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled Weight Decay Regularization. arxiv: 1711.05101 [cs.LG]"},{"key":"e_1_3_2_1_21_1","unstructured":"Yifan Mao Jian Liu and Xianming Liu. 2024. Stealing Stable Diffusion Prior for Robust Monocular Depth Estimation. arxiv: 2403.05056 [cs.CV]"},{"key":"e_1_3_2_1_22_1","unstructured":"Jisu Nam Gyuseong Lee Sunwoo Kim Hyeonsu Kim Hyoungwon Cho Seyeon Kim and Seungryong Kim. 2023. DiffMatch: Diffusion Model for Dense Matching. arxiv: 2305.19094 [cs.CV]"},{"key":"e_1_3_2_1_23_1","volume-title":"ECoDepth: Effective Conditioning of Diffusion Models for Monocular Depth Estimation. arXiv preprint arXiv:2403.18807","author":"Patni Suraj","year":"2024","unstructured":"Suraj Patni, Aradhye Agarwal, and Chetan Arora. 2024. ECoDepth: Effective Conditioning of Diffusion Models for Monocular Depth Estimation. arXiv preprint arXiv:2403.18807 (2024)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"e_1_3_2_1_25_1","volume-title":"Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer. arxiv","author":"Ranftl Ren\u00e9","year":"1907","unstructured":"Ren\u00e9 Ranftl, Katrin Lasinger, David Hafner, Konrad Schindler, and Vladlen Koltun. 2020. Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer. arxiv: 1907.01341 [cs.CV]"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Kieran Saunders George Vogiatzis and Luis Manso. 2023. Self-supervised Monocular Depth Estimation: Let's Talk About The Weather. arxiv: 2307.08357 [cs.CV]","DOI":"10.1109\/ICCV51070.2023.00818"},{"key":"e_1_3_2_1_27_1","volume":"202","author":"Saxena Saurabh","unstructured":"Saurabh Saxena, Charles Herrmann, Junhwa Hur, Abhishek Kar, Mohammad Norouzi, Deqing Sun, and David J. Fleet. 2023. The Surprising Effectiveness of Diffusion Models for Optical Flow and Monocular Depth Estimation. arxiv: 2306.01923 [cs.CV]","journal-title":"David J. Fleet."},{"key":"e_1_3_2_1_28_1","unstructured":"Shuwei Shao Zhongcai Pei Weihai Chen Dingchi Sun Peter C. Y. Chen and Zhengguo Li. 2023. MonoDiffusion: Self-Supervised Monocular Depth Estimation Using Diffusion Model. arxiv: 2311.07198 [cs.CV]"},{"key":"e_1_3_2_1_29_1","volume-title":"Denoising Diffusion Implicit Models. arxiv","author":"Song Jiaming","year":"2010","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2020. Denoising Diffusion Implicit Models. arxiv: 2010.02502 [cs.LG]"},{"key":"e_1_3_2_1_30_1","unstructured":"Ziyang Song Ruijie Zhu Chuxin Wang Jiacheng Deng Jianfeng He and Tianzhu Zhang. 2023. EC-Depth: Exploring the consistency of self-supervised monocular depth estimation in challenging scenes. arxiv: 2310.08044 [cs.CV]"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Jiyuan Wang Chunyu Lin Lang Nie Shujun Huang Yao Zhao Xing Pan and Rui Ai. 2024. WeatherDepth: Curriculum Contrastive Learning for Self-Supervised Depth Estimation under Adverse Weather Conditions. arxiv: 2310.05556 [cs.CV]","DOI":"10.1109\/ICRA57147.2024.10611100"},{"key":"e_1_3_2_1_32_1","volume-title":"Jiajun Liu, Yao Zhao, and Yunchao Wei.","author":"Wang Mengyu","year":"2023","unstructured":"Mengyu Wang, Henghui Ding, Jun Hao Liew, Jiajun Liu, Yao Zhao, and Yunchao Wei. 2023. SegRefiner: Towards Model-Agnostic Segmentation Refinement with Discrete Diffusion Process. In NeurIPS."},{"key":"e_1_3_2_1_33_1","volume-title":"DrivingStereo: A Large-Scale Dataset for Stereo Matching in Autonomous Driving Scenarios. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Yang Guorun","year":"2019","unstructured":"Guorun Yang, Xiao Song, Chaoqin Huang, Zhidong Deng, Jianping Shi, and Bolei Zhou. 2019. DrivingStereo: A Large-Scale Dataset for Stereo Matching in Autonomous Driving Scenarios. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01778"},{"key":"e_1_3_2_1_35_1","unstructured":"Chaoqiang Zhao Yang Tang and Qiyu Sun. 2022. Unsupervised Monocular Depth Estimation in Highly Complex Environments. arxiv: 2107.13137 [cs.CV]"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV57658.2022.00077"},{"key":"e_1_3_2_1_37_1","volume-title":"Unleashing Text-to-Image Diffusion Models for Visual Perception. ICCV","author":"Zhao Wenliang","year":"2023","unstructured":"Wenliang Zhao, Yongming Rao, Zuyan Liu, Benlin Liu, Jie Zhou, and Jiwen Lu. 2023. Unleashing Text-to-Image Diffusion Models for Visual Perception. ICCV (2023)."},{"key":"e_1_3_2_1_38_1","volume-title":"Lowe","author":"Zhou Tinghui","year":"2017","unstructured":"Tinghui Zhou, Matthew Brown, Noah Snavely, and David G. Lowe. 2017. Unsupervised Learning of Depth and Ego-Motion from Video. arxiv: 1704.07813 [cs.CV]"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681168","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681168","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:02Z","timestamp":1750295882000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681168"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":38,"alternative-id":["10.1145\/3664647.3681168","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681168","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}