{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T15:57:34Z","timestamp":1778947054183,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":72,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754871","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:55:00Z","timestamp":1761375300000},"page":"11239-11248","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["DepthDark: Robust Monocular Depth Estimation for Low-Light Environments"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-4543-814X","authenticated-orcid":false,"given":"Longjian","family":"Zeng","sequence":"first","affiliation":[{"name":"Hangzhou Dianzi University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6107-4538","authenticated-orcid":false,"given":"Zunjie","family":"Zhu","sequence":"additional","affiliation":[{"name":"Hangzhou Dianzi University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8002-4688","authenticated-orcid":false,"given":"Rongfeng","family":"Lu","sequence":"additional","affiliation":[{"name":"Hangzhou Dianzi University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6819-6490","authenticated-orcid":false,"given":"Ming","family":"Lu","sequence":"additional","affiliation":[{"name":"Intel Labs China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8788-1725","authenticated-orcid":false,"given":"Bolun","family":"Zheng","sequence":"additional","affiliation":[{"name":"Hangzhou Dianzi University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7415-7938","authenticated-orcid":false,"given":"Chenggang","family":"Yan","sequence":"additional","affiliation":[{"name":"Hangzhou Dianzi University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8313-8520","authenticated-orcid":false,"given":"Anke","family":"Xue","sequence":"additional","affiliation":[{"name":"Hangzhou Dianzi University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/38.963459"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25090"},{"key":"e_1_3_2_1_3_1","volume-title":"Exploring visual prompts for adapting large-scale models. arXiv preprint arXiv:2203.17274","author":"Bahng Hyojin","year":"2022","unstructured":"Hyojin Bahng, Ali Jahanian, Swami Sankaranarayanan, and Phillip Isola. 2022. Exploring visual prompts for adapting large-scale models. arXiv preprint arXiv:2203.17274 (2022)."},{"key":"e_1_3_2_1_4_1","volume-title":"Zoedepth: Zero-shot transfer by combining relative and metric depth. arXiv preprint arXiv:2302.12288","author":"Bhat Shariq Farooq","year":"2023","unstructured":"Shariq Farooq Bhat, Reiner Birkl, Diana Wofk, Peter Wonka, and Matthias M\u00fcller. 2023. Zoedepth: Zero-shot transfer by combining relative and metric depth. arXiv preprint arXiv:2302.12288 (2023)."},{"key":"e_1_3_2_1_5_1","unstructured":"Rishi Bommasani Drew A Hudson Ehsan Adeli Russ Altman Simran Arora Sydney von Arx Michael S Bernstein Jeannette Bohg Antoine Bosselut Emma Brunskill et al. 2021a. On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258 (2021)."},{"key":"e_1_3_2_1_6_1","unstructured":"Rishi Bommasani Drew A Hudson Ehsan Adeli Russ Altman Simran Arora Sydney von Arx Michael S Bernstein Jeannette Bohg Antoine Bosselut Emma Brunskill et al. 2021b. On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258 (2021)."},{"key":"e_1_3_2_1_7_1","volume-title":"Virtual kitti 2. arXiv preprint arXiv:2001.10773","author":"Cabon Yohann","year":"2020","unstructured":"Yohann Cabon, Naila Murray, and Martin Humenberger. 2020. Virtual kitti 2. arXiv preprint arXiv:2001.10773 (2020)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01149"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00391"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547947"},{"key":"e_1_3_2_1_12_1","first-page":"3926","article-title":"Flare7k: A phenomenological nighttime flare removal dataset","volume":"35","author":"Dai Yuekun","year":"2022","unstructured":"Yuekun Dai, Chongyi Li, Shangchen Zhou, Ruicheng Feng, and Chen Change Loy. 2022. Flare7k: A phenomenological nighttime flare removal dataset. Advances in Neural Information Processing Systems, Vol. 35 (2022), 3926-3937.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_13_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_14_1","volume-title":"Generative models: What do they know? do they know things? let's find out! arXiv preprint arXiv:2311.17137","author":"Du Xiaodan","year":"2023","unstructured":"Xiaodan Du, Nicholas Kolkin, Greg Shakhnarovich, and Anand Bhattad. 2023. Generative models: What do they know? do they know things? let's find out! arXiv preprint arXiv:2311.17137 (2023)."},{"key":"e_1_3_2_1_15_1","volume-title":"Depth map prediction from a single image using a multi-scale deep network. Advances in neural information processing systems","author":"Eigen David","year":"2014","unstructured":"David Eigen, Christian Puhrsch, and Rob Fergus. 2014. Depth map prediction from a single image using a multi-scale deep network. Advances in neural information processing systems, Vol. 27 (2014)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548186"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01891-x"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00393"},{"key":"e_1_3_2_1_20_1","volume-title":"Introduction to Fourier optics","author":"Goodman Joseph W","unstructured":"Joseph W Goodman. 2005. Introduction to Fourier optics. Roberts and Company publishers."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-006-0031-y"},{"key":"e_1_3_2_1_23_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01454"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3176533"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00907"},{"key":"e_1_3_2_1_27_1","volume-title":"Towards robust monocular depth estimation: Mixing datasets for zero-shot cross-dataset transfer. arXiv preprint arXiv:1907.01341","author":"Lasinger Katrin","year":"2019","unstructured":"Katrin Lasinger, Ren\u00e9 Ranftl, Konrad Schindler, and Vladlen Koltun. 2019. Towards robust monocular depth estimation: Mixing datasets for zero-shot cross-dataset transfer. arXiv preprint arXiv:1907.01341 (2019)."},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition. 1119-1127","author":"Li Bo","year":"2015","unstructured":"Bo Li, Chunhua Shen, Yuchao Dai, Anton Van Den Hengel, and Mingyi He. 2015. Depth and surface normal estimation from monocular images using regression on deep features and hierarchical crfs. In Proceedings of the IEEE conference on computer vision and pattern recognition. 1119-1127."},{"key":"e_1_3_2_1_29_1","volume-title":"VGNC: Reducing the Overfitting of Sparse-view 3DGS via Validation-guided Gaussian Number Control. arXiv preprint arXiv:2504.14548","author":"Lin Lifeng","year":"2025","unstructured":"Lifeng Lin, Rongfeng Lu, Quan Chen, Haofan Ren, Ming Lu, Yaoqi Sun, Chenggang Yan, and Anke Xue. 2025. VGNC: Reducing the Overfitting of Sparse-view 3DGS via Validation-guided Gaussian Number Control. arXiv preprint arXiv:2504.14548 (2025)."},{"key":"e_1_3_2_1_30_1","volume-title":"Optical physics","author":"Lipson Ariel","unstructured":"Ariel Lipson, Stephen G Lipson, and Henry Lipson. 2010. Optical physics. Cambridge University Press."},{"key":"e_1_3_2_1_31_1","first-page":"28","volume-title":"Marseille","author":"Liu Ce","year":"2008","unstructured":"Ce Liu, Jenny Yuen, Antonio Torralba, Josef Sivic, and William T Freeman. 2008. Sift flow: Dense correspondence across different scenes. In Computer Vision-ECCV 2008: 10th European Conference on Computer Vision, Marseille, France, October 12-18, 2008, Proceedings, Part III 10. Springer, 28-42."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01250"},{"key":"e_1_3_2_1_33_1","volume-title":"Thermalgaussian: Thermal 3d gaussian splatting. arXiv preprint arXiv:2409.07200","author":"Lu Rongfeng","year":"2024","unstructured":"Rongfeng Lu, Hangyu Chen, Zunjie Zhu, Yuhang Qin, Ming Lu, Le Zhang, Chenggang Yan, and Anke Xue. 2024. Thermalgaussian: Thermal 3d gaussian splatting. arXiv preprint arXiv:2409.07200 (2024)."},{"key":"e_1_3_2_1_34_1","first-page":"1","article-title":"Self-supervised camera relocalization with hierarchical fern encoding","volume":"73","author":"Lu Rongfeng","year":"2023","unstructured":"Rongfeng Lu, Zunjie Zhu, Sheng Fu, Shenrong Chen, Tingyu Wang, Chenggang Yan, and Feng Xu. 2023. Self-supervised camera relocalization with hierarchical fern encoding. IEEE Transactions on Instrumentation and Measurement, Vol. 73 (2023), 1-12.","journal-title":"IEEE Transactions on Instrumentation and Measurement"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"YiFan Lu Ning Xie and Heng Tao Shen. 2020. DMCR-GAN: adversarial denoising for monte carlo renderings with residual attention networks and hierarchical features modulation of auxiliary buffers. In SIGGRAPH Asia 2020 Technical Communications. 1-4.","DOI":"10.1145\/3410700.3425426"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16329"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364916679498"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Bui Tuong Phong. 1998. Illumination for computer generated pictures. In Seminal graphics: pioneering efforts that shaped the field. 95-101.","DOI":"10.1145\/280811.280980"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3129396"},{"key":"e_1_3_2_1_40_1","volume-title":"International conference on machine learning. PMLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748-8763."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3562939.3565620"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01073"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00729"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","unstructured":"H Touvron T Lavril G Izacard X Martinet MA Lachaux T Lacroix B Rozi\u00e8re N Goyal E Hambro F Azhar et al. 2023. Open and efficient foundation language models. Preprint at arXiv. https:\/\/doi.org\/10.48550\/arXiv Vol. 2302 (2023).","DOI":"10.48550\/arXiv"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58604-1_27"},{"key":"e_1_3_2_1_48_1","volume-title":"Conference on Robot Learning. PMLR","author":"Vankadari Madhu","year":"2023","unstructured":"Madhu Vankadari, Stuart Golodetz, Sourav Garg, Sangyun Shin, Andrew Markham, and Niki Trigoni. 2023. When the sun goes down: Repairing photometric losses for all-day depth estimation. In Conference on Robot Learning. PMLR, 1992-2003."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01575"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00864"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00283"},{"key":"e_1_3_2_1_52_1","first-page":"8520","article-title":"Physics-based noise modeling for extreme low-light photography","volume":"44","author":"Wei Kaixuan","year":"2021","unstructured":"Kaixuan Wei, Ying Fu, Yinqiang Zheng, and Jiaolong Yang. 2021. Physics-based noise modeling for extreme low-light photography. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 44, 11 (2021), 8520-8537.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794182"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00069"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681299"},{"key":"e_1_3_2_1_56_1","volume-title":"Self-Supervised Monocular Depth Estimation in the Dark: Towards Data Distribution Compensation. arXiv preprint arXiv:2404.13854","author":"Yang Haolin","year":"2024","unstructured":"Haolin Yang, Chaoqiang Zhao, Lu Sheng, and Yang Tang. 2024c. Self-Supervised Monocular Depth Estimation in the Dark: Towards Data Distribution Compensation. arXiv preprint arXiv:2404.13854 (2024)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00987"},{"key":"e_1_3_2_1_58_1","volume-title":"Depth Anything V2. arXiv preprint arXiv:2406.09414","author":"Yang Lihe","year":"2024","unstructured":"Lihe Yang, Bingyi Kang, Zilong Huang, Zhen Zhao, Xiaogang Xu, Jiashi Feng, and Hengshuang Zhao. 2024b. Depth Anything V2. arXiv preprint arXiv:2406.09414 (2024)."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01168"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00653"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00578"},{"key":"e_1_3_2_1_62_1","volume-title":"Pseudo-lidar: Accurate depth for 3d object detection in autonomous driving. arXiv. arXiv preprint arXiv:1906.06310","author":"You Y","year":"2019","unstructured":"Y You, Y Wang, WL Chao, D Garg, G Pleiss, B Hariharan, M Campbell, and KQ Weinberger. 2019. Pseudo-lidar: Accurate depth for 3d object detection in autonomous driving. arXiv. arXiv preprint arXiv:1906.06310 (2019)."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3478513.3480565"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01460"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00455"},{"key":"e_1_3_2_1_66_1","volume-title":"Proceedings of the 32nd ACM International Conference on Multimedia. 7523-7532","author":"Zhang Zhedong","unstructured":"Zhedong Zhang, Liang Li, Gaoxiang Cong, Haibing Yin, Yuhan Gao, Chenggang Yan, Anton van den Hengel, and Yuankai Qi. 2024. From speaker to dubber: movie dubbing with prosody and duration consistency learning. In Proceedings of the 32nd ACM International Conference on Multimedia. 7523-7532."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2022.3182360"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV57658.2022.00077"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3123621"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00527"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3115139"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19815-1_40"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754871","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:01:29Z","timestamp":1765339289000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754871"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":72,"alternative-id":["10.1145\/3746027.3754871","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754871","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}