{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T10:28:12Z","timestamp":1752229692593,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Qingdao Postdoctoral Applied Foundation","award":["QDBSH20240102029"],"award-info":[{"award-number":["QDBSH20240102029"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100018546","name":"Postdoctoral Innovation Project of Shandong Province","doi-asserted-by":"publisher","award":["SDCX-ZG-202400312"],"award-info":[{"award-number":["SDCX-ZG-202400312"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100018546","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681505","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:27Z","timestamp":1729925967000},"page":"8903-8912","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["LiteGfm: A Lightweight Self-supervised Monocular Depth Estimation Framework for Artifacts Reduction via Guided Image Filtering"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3331-1848","authenticated-orcid":false,"given":"Zhilin","family":"He","sequence":"first","affiliation":[{"name":"Qingdao University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-4630-0197","authenticated-orcid":false,"given":"Yawei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Qingdao University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9214-9706","authenticated-orcid":false,"given":"Jingchang","family":"Mu","sequence":"additional","affiliation":[{"name":"Qingdao University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9341-2077","authenticated-orcid":false,"given":"Xiaoyue","family":"Gu","sequence":"additional","affiliation":[{"name":"Qingdao University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8635-8577","authenticated-orcid":false,"given":"Tianhao","family":"Gu","sequence":"additional","affiliation":[{"name":"Qingdao University, Qingdao, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Detail-Preserving Self-Supervised Monocular Depth with Self-Supervised Structural Sharpening. IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops","author":"Gonzalez Bello Juan Luis","year":"2023","unstructured":"Juan Luis Gonzalez Bello, Jaeho Moon, and Munchurl Kim. 2023. Detail-Preserving Self-Supervised Monocular Depth with Self-Supervised Structural Sharpening. IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (2023), 254--264. https:\/\/api.semanticscholar.org\/CorpusID:260912119"},{"key":"e_1_3_2_1_2_1","volume-title":"X-distill: Improving Self-supervised Monocular Depth via Cross-task Distillation. arXiv preprint arXiv:2110.12516","author":"Cai Hong","year":"2021","unstructured":"Hong Cai, Janarbek Matai, Shubhankar Borse, Yizhe Zhang, Amin Ansari, and Fatih Porikli. 2021. X-distill: Improving Self-supervised Monocular Depth via Cross-task Distillation. arXiv preprint arXiv:2110.12516 (2021)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018001"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.1983.4767341"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_6_1","volume-title":"Words: Transformers for Image Recognition at Scale. arXiv preprint arXiv:2010.11929","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, et al. 2020. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.304"},{"key":"e_1_3_2_1_8_1","volume-title":"Depth Map Prediction from a Single Image Using a Multi-scale Deep Network. Advances in neural information processing systems","author":"Eigen David","year":"2014","unstructured":"David Eigen, Christian Puhrsch, and Rob Fergus. 2014. Depth Map Prediction from a Single Image Using a Multi-scale Deep Network. Advances in neural information processing systems, Vol. 27 (2014)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3330054"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913491297"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00393"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.213"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00140"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00197"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01270-0_42"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01241"},{"key":"e_1_3_2_1_18_1","volume-title":"Conditional Random Fields: Probabilistic Models for Segmenting and Labeling Sequence Data. In International Conference on Machine Learning","volume":"1","author":"Lafferty John","year":"2001","unstructured":"John Lafferty, Andrew McCallum, Fernando Pereira, et al. 2001. Conditional Random Fields: Probabilistic Models for Segmenting and Labeling Sequence Data. In International Conference on Machine Learning, Vol. 1. Williamstown, MA, 3."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2890623"},{"key":"e_1_3_2_1_20_1","volume-title":"Test-Time Domain Adaptation for Monocular Depth Estimation. In IEEE International Conference on Robotics and Automation. IEEE, 4873--4879","author":"Li Zhi","year":"2023","unstructured":"Zhi Li, Shaoshuai Shi, Bernt Schiele, and Dengxin Dai. 2023. Test-Time Domain Adaptation for Monocular Depth Estimation. In IEEE International Conference on Robotics and Automation. IEEE, 4873--4879."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00557"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01124"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3275584"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.1999.790410"},{"key":"e_1_3_2_1_25_1","volume-title":"Swiftdepth: An Efficient Hybrid CNN-Transformer Model for Self-supervised Monocular Depth Estimation on Mobile Devices","author":"Luginov Albert","year":"2023","unstructured":"Albert Luginov and Ilya Makarov. 2023. Swiftdepth: An Efficient Hybrid CNN-Transformer Model for Self-supervised Monocular Depth Estimation on Mobile Devices. In IEEE International Symposium on Mixed and Augmented Reality Adjunct. IEEE, 642--647."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2930258"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16329"},{"key":"e_1_3_2_1_28_1","volume-title":"Mobilevit: Light-Weight, General-Purpose, and Mobile-Friendly Vision Transformer. arXiv preprint arXiv:2110.02178","author":"Mehta Sachin","year":"2021","unstructured":"Sachin Mehta and Mohammad Rastegari. 2021. Mobilevit: Light-Weight, General-Purpose, and Mobile-Friendly Vision Transformer. arXiv preprint arXiv:2110.02178 (2021)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00329"},{"key":"e_1_3_2_1_30_1","unstructured":"Yallamandaiah S. and Purnachand N. 2021. An Effective Face Recognition Method Using Guided Image Filter and Convolutional Neural Network. Indonesian Journal of Electrical Engineering and Computer Science (2021). https:\/\/api.semanticscholar.org\/CorpusID:239042820"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2008.132"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00838"},{"key":"e_1_3_2_1_33_1","volume-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2940948"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00216"},{"key":"e_1_3_2_1_36_1","volume-title":"Image Quality Assessment: from Error Visibility to Structural Similarity","author":"Wang Zhou","year":"2004","unstructured":"Zhou Wang, Alan C Bovik, Hamid R Sheikh, and Eero P Simoncelli. 2004. Image Quality Assessment: from Error Visibility to Structural Similarity. IEEE transactions on image processing, Vol. 13, 4 (2004), 600--612."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.3390\/rs10010144"},{"key":"e_1_3_2_1_38_1","volume-title":"Yadav and Kishor Prabhakar Sarawadekar","author":"Kr Sumit","year":"2020","unstructured":"Sumit Kr. Yadav and Kishor Prabhakar Sarawadekar. 2020. Steering Kernel-Based Guided Image Filter for Single Image Dehazing. IEEE REGION 10 CONFERENCE (TENCON) (2020), 444--449. https:\/\/api.semanticscholar.org\/CorpusID:229374214"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00136"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00212"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00212"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.75"},{"key":"e_1_3_2_1_43_1","first-page":"1","article-title":"Stereo Matching by Training a Convolutional Neural Network to Compare Image Patches","volume":"17","author":"Zbontar Jure","year":"2016","unstructured":"Jure vZbontar and Yann LeCun. 2016. Stereo Matching by Training a Convolutional Neural Network to Compare Image Patches. Journal of Machine Learning Research, Vol. 17, 65 (2016), 1--32.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01778"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2019.10.024"},{"key":"e_1_3_2_1_46_1","volume-title":"Attention Guided Network for Retinal Image Segmentation. In International Conference on Medical Image Computing and Computer-assisted Intervention. https:\/\/api.semanticscholar.org\/CorpusID:198986024","author":"Zhang S.","year":"2019","unstructured":"S. Zhang, H. Fu, Yan Yuguang, Zhang Yubing, Wu Qingyao, Yang Ming, Tan Mingkui, and Xu Yanwu. 2019. Attention Guided Network for Retinal Image Segmentation. In International Conference on Medical Image Computing and Computer-assisted Intervention. https:\/\/api.semanticscholar.org\/CorpusID:198986024"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3160399"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3253472"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3253472"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.174"},{"key":"e_1_3_2_1_51_1","volume-title":"Self-supervised Monocular Depth Estimation with Internal Feature Fusion. arXiv preprint arXiv:2110.09482","author":"Zhou Hang","year":"2021","unstructured":"Hang Zhou, David Greenwood, and Sarah Taylor. 2021. Self-supervised Monocular Depth Estimation with Internal Feature Fusion. arXiv preprint arXiv:2110.09482 (2021)."},{"key":"e_1_3_2_1_52_1","volume-title":"Self-Supervised Monocular Depth Estimation with Internal Feature Fusion. In British Machine Vision Conference. https:\/\/api.semanticscholar.org\/CorpusID:239015886","author":"Zhou Hang","year":"2021","unstructured":"Hang Zhou, David Greenwood, and Sarah Taylor. 2021. Self-Supervised Monocular Depth Estimation with Internal Feature Fusion. In British Machine Vision Conference. https:\/\/api.semanticscholar.org\/CorpusID:239015886"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.700"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01254"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681505","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681505","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:48Z","timestamp":1750294668000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681505"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":54,"alternative-id":["10.1145\/3664647.3681505","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681505","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}