{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T10:52:45Z","timestamp":1762253565584,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":76,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,10,12]],"date-time":"2020-10-12T00:00:00Z","timestamp":1602460800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100012659","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61901384, 61871328"],"award-info":[{"award-number":["61901384, 61871328"]}],"id":[{"id":"10.13039\/501100012659","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,10,12]]},"DOI":"10.1145\/3394171.3413706","type":"proceedings-article","created":{"date-parts":[[2020,10,12]],"date-time":"2020-10-12T12:26:18Z","timestamp":1602505578000},"page":"3108-3117","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":14,"title":["Enhancing Self-supervised Monocular Depth Estimation via Incorporating Robust Constraints"],"prefix":"10.1145","author":[{"given":"Rui","family":"Li","sequence":"first","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"given":"Xiantuo","family":"He","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"given":"Yu","family":"Zhu","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"given":"Xianjun","family":"Li","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"given":"Jinqiu","family":"Sun","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"given":"Yanning","family":"Zhang","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]}],"member":"320","published-online":{"date-parts":[[2020,10,12]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/2001269.2001293"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2019.00054"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1162\/pres.1997.6.4.355"},{"key":"e_1_3_2_2_4_1","unstructured":"Jiawang Bian Zhichao Li Naiyan Wang Huangying Zhan Chunhua Shen Ming-Ming Cheng and Ian Reid. 2019. Unsupervised scale-consistent depth and ego-motion learning from monocular video. In Advances in Neural Information Processing Systems. 35--45.  Jiawang Bian Zhichao Li Naiyan Wang Huangying Zhan Chunhua Shen Ming-Ming Cheng and Ian Reid. 2019. Unsupervised scale-consistent depth and ego-motion learning from monocular video. In Advances in Neural Information Processing Systems. 35--45."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"crossref","unstructured":"Julie Carmigniani Borko Furht Marco Anisetti Paolo Ceravolo Ernesto Damiani and Misa Ivkovic. 2011. Augmented reality technologies systems and applications. Multimedia tools and applications Vol. 51 1 (2011) 341--377.  Julie Carmigniani Borko Furht Marco Anisetti Paolo Ceravolo Ernesto Damiani and Misa Ivkovic. 2011. Augmented reality technologies systems and applications. Multimedia tools and applications Vol. 51 1 (2011) 341--377.","DOI":"10.1007\/s11042-010-0660-6"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018001"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00716"},{"volume-title":"CSPN: Learning Context and Resource Aware Convolutional Spatial Propagation Networks for Depth Completion. arXiv preprint arXiv:1911.05377","year":"2019","author":"Cheng Xinjing","key":"e_1_3_2_2_8_1"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.350"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.257"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_2_12_1","unstructured":"David Eigen Christian Puhrsch and Rob Fergus. 2014. Depth map prediction from a single image using a multi-scale deep network. In Advances in neural information processing systems. 2366--2374.  David Eigen Christian Puhrsch and Rob Fergus. 2014. Depth map prediction from a single image using a multi-scale deep network. In Advances in neural information processing systems. 2366--2374."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"crossref","unstructured":"Jakob Engel Vladlen Koltun and Daniel Cremers. 2017. Direct sparse odometry. IEEE transactions on pattern analysis and machine intelligence Vol. 40 3 (2017) 611--625.  Jakob Engel Vladlen Koltun and Daniel Cremers. 2017. Direct sparse odometry. IEEE transactions on pattern analysis and machine intelligence Vol. 40 3 (2017) 611--625.","DOI":"10.1109\/TPAMI.2017.2658577"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2015.7353631"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2011.6130252"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15561-1_27"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00214"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1561\/9781601988379"},{"volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition. 4340--4349","year":"2016","author":"Gaidon Adrien","key":"e_1_3_2_2_19_1"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.5555\/2354409.2354978"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.699"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00393"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"crossref","unstructured":"Richard Hartley and Andrew Zisserman. 2003. Multiple view geometry in computer vision. Cambridge university press.  Richard Hartley and Andrew Zisserman. 2003. Multiple view geometry in computer vision. Cambridge university press.","DOI":"10.1017\/CBO9780511811685"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_25_1","unstructured":"Max Jaderberg Karen Simonyan Andrew Zisserman et almbox. 2015. Spatial transformer networks. In Advances in neural information processing systems. 2017--2025.  Max Jaderberg Karen Simonyan Andrew Zisserman et almbox. 2015. Spatial transformer networks. In Advances in neural information processing systems. 2017--2025."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"crossref","unstructured":"Hualie Jiang Laiyan Ding and Rui Huang. 2020. DiPE: Deeper into Photometric Errors for Unsupervised Learning of Depth and Ego-motion from Monocular Videos. arXiv: Computer Vision and Pattern Recognition (2020).  Hualie Jiang Laiyan Ding and Rui Huang. 2020. DiPE: Deeper into Photometric Errors for Unsupervised Learning of Depth and Ego-motion from Monocular Videos. arXiv: Computer Vision and Pattern Recognition (2020).","DOI":"10.1109\/IROS45743.2020.9341074"},{"key":"e_1_3_2_2_27_1","volume-title":"Proceedings Eighth IEEE International Conference on Computer Vision. ICCV","volume":"1","author":"Jin Hailin","year":"2001"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1350-0775.2004.00457.x"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2316835"},{"volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","year":"2014","author":"Kingma Diederik P","key":"e_1_3_2_2_30_1"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01249-6_43"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.238"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.19"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2016.32"},{"volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition. 1119--1127","year":"2015","author":"Li Bo","key":"e_1_3_2_2_35_1"},{"volume-title":"Robust and Accurate Hybrid Structure-From-Moti. In 2019 IEEE International Conference on Image Processing (ICIP). IEEE, 494--498","year":"2019","author":"Li Rui","key":"e_1_3_2_2_36_1"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539823"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.97"},{"key":"e_1_3_2_2_39_1","unstructured":"Chenxu Luo Zhenheng Yang Peng Wang Yang Wang Wei Xu Ram Nevatia and Alan Yuille. 2018. Every pixel counts: Joint learning of geometry and motion with 3d holistic understanding. arXiv preprint arXiv:1810.06125 (2018).  Chenxu Luo Zhenheng Yang Peng Wang Yang Wang Wei Xu Ram Nevatia and Alan Yuille. 2018. Every pixel counts: Joint learning of geometry and motion with 3d holistic understanding. arXiv preprint arXiv:1810.06125 (2018)."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00594"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2011.6130280"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"crossref","unstructured":"Raul Mur-Artal Jose Maria Martinez Montiel and Juan D Tardos. 2015. ORB-SLAM: a versatile and accurate monocular SLAM system. IEEE transactions on robotics Vol. 31 5 (2015) 1147--1163.  Raul Mur-Artal Jose Maria Martinez Montiel and Juan D Tardos. 2015. ORB-SLAM: a versatile and accurate monocular SLAM system. IEEE transactions on robotics Vol. 31 5 (2015) 1147--1163.","DOI":"10.1109\/TRO.2015.2463671"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00281"},{"key":"e_1_3_2_2_44_1","unstructured":"Vaishakh Patil Wouter Van Gansbeke Dengxin Dai and Luc Van Gool. 2020. Don't Forget The Past: Recurrent Depth Estimation from Monocular Video. arXiv preprint arXiv:2001.02613 (2020).  Vaishakh Patil Wouter Van Gansbeke Dengxin Dai and Luc Van Gool. 2020. Don't Forget The Past: Recurrent Depth Estimation from Monocular Video. arXiv preprint arXiv:2001.02613 (2020)."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793621"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"crossref","unstructured":"Andrea Pilzer St\u00e9phane Lathuili\u00e8re Dan Xu Mihai Marian Puscas Elisa Ricci and Nicu Sebe. 2019. Progressive Fusion for Unsupervised Binocular Depth Estimation using Cycled Networks. IEEE Transactions on Pattern Analysis and Machine Intelligence (2019).  Andrea Pilzer St\u00e9phane Lathuili\u00e8re Dan Xu Mihai Marian Puscas Elisa Ricci and Nicu Sebe. 2019. Progressive Fusion for Unsupervised Binocular Depth Estimation using Cycled Networks. IEEE Transactions on Pattern Analysis and Machine Intelligence (2019).","DOI":"10.1109\/TPAMI.2019.2942928"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2018.00045"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01252"},{"key":"e_1_3_2_2_49_1","unstructured":"Ashutosh Saxena Sung H Chung and Andrew Y Ng. 2006. Learning depth from single monocular images. In Advances in neural information processing systems. 1161--1168.  Ashutosh Saxena Sung H Chung and Andrew Y Ng. 2006. Learning depth from single monocular images. In Advances in neural information processing systems. 1161--1168."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"crossref","unstructured":"Ashutosh Saxena Min Sun and Andrew Y Ng. 2008. Make3d: Learning 3d scene structure from a single still image. IEEE transactions on pattern analysis and machine intelligence Vol. 31 5 (2008) 824--840.  Ashutosh Saxena Min Sun and Andrew Y Ng. 2008. Make3d: Learning 3d scene structure from a single still image. IEEE transactions on pattern analysis and machine intelligence Vol. 31 5 (2008) 824--840.","DOI":"10.1109\/TPAMI.2008.132"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.445"},{"volume-title":"Beyond Photometric Loss for Self-Supervised Ego-Motion Estimation. In International Conference on Robotics and Automation. IEEE.","year":"2019","author":"Shen Tianwei","key":"e_1_3_2_2_52_1"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"crossref","unstructured":"Tianwei Shen Siyu Zhu Tian Fang Runze Zhang and Long Quan. 2016. Graph-Based Consistent Matching for Structure-from-Motion. (2016) 139--155.  Tianwei Shen Siyu Zhu Tian Fang Runze Zhang and Long Quan. 2016. Graph-Based Consistent Matching for Structure-from-Motion. (2016) 139--155.","DOI":"10.1007\/978-3-319-46487-9_9"},{"key":"e_1_3_2_2_54_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014).  Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1080\/01972240903028714"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1063\/1.5120605"},{"key":"e_1_3_2_2_57_1","unstructured":"Jie Tang Fei-Peng Tian Wei Feng Jian Li and Ping Tan. 2019. Learning guided convolutional network for depth completion. arXiv preprint arXiv:1908.01238 (2019).  Jie Tang Fei-Peng Tian Wei Feng Jian Li and Ping Tan. 2019. Learning guided convolutional network for depth completion. arXiv preprint arXiv:1908.01238 (2019)."},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"crossref","unstructured":"Alessio Tonioni Matteo Poggi Stefano Mattoccia and Luigi Di Stefano. 2019. Unsupervised Domain Adaptation for Depth Prediction from Images. IEEE Transactions on Pattern Analysis and Machine Intelligence (2019).  Alessio Tonioni Matteo Poggi Stefano Mattoccia and Luigi Di Stefano. 2019. Unsupervised Domain Adaptation for Depth Prediction from Images. IEEE Transactions on Pattern Analysis and Machine Intelligence (2019).","DOI":"10.1109\/TPAMI.2019.2940948"},{"volume-title":"Sfm-net: Learning of structure and motion from video. arXiv preprint arXiv:1704.07804","year":"2017","author":"Vijayanarasimhan Sudheendra","key":"e_1_3_2_2_59_1"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00216"},{"volume-title":"Unsupervised Learning of Monocular Depth and Ego-Motion Using Multiple Masks. In 2019 International Conference on Robotics and Automation (ICRA). IEEE, 4724--4730","year":"2019","author":"Wang Guangming","key":"e_1_3_2_2_61_1"},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.421"},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00826"},{"key":"e_1_3_2_2_64_1","doi-asserted-by":"crossref","unstructured":"Zhou Wang Alan C Bovik Hamid R Sheikh and Eero P Simoncelli. 2004. Image quality assessment: from error visibility to structural similarity. IEEE transactions on image processing Vol. 13 4 (2004) 600--612.  Zhou Wang Alan C Bovik Hamid R Sheikh and Eero P Simoncelli. 2004. Image quality assessment: from error visibility to structural similarity. IEEE transactions on image processing Vol. 13 4 (2004) 600--612.","DOI":"10.1109\/TIP.2003.819861"},{"key":"e_1_3_2_2_65_1","unstructured":"Yiran Wu Sihao Ying and Lianmin Zheng. 2018. Size-to-depth: a new perspective for single image depth estimation. arXiv preprint arXiv:1801.04461 (2018).  Yiran Wu Sihao Ying and Lianmin Zheng. 2018. Size-to-depth: a new perspective for single image depth estimation. arXiv preprint arXiv:1801.04461 (2018)."},{"key":"e_1_3_2_2_66_1","doi-asserted-by":"crossref","unstructured":"Nan Yang Lukas Von Stumberg Rui Wang and Daniel Cremers. 2020. D3VO: Deep Depth Deep Pose and Deep Uncertainty for Monocular Visual Odometry. arXiv: Computer Vision and Pattern Recognition (2020).  Nan Yang Lukas Von Stumberg Rui Wang and Daniel Cremers. 2020. D3VO: Deep Depth Deep Pose and Deep Uncertainty for Monocular Visual Odometry. arXiv: Computer Vision and Pattern Recognition (2020).","DOI":"10.1109\/CVPR42600.2020.00136"},{"key":"e_1_3_2_2_67_1","first-page":"2878","article-title":"Challenges in Monocular Visual Odometry: Photometric Calibration","volume":"3","author":"Yang Nan","year":"2018","journal-title":"Motion Bias and Rolling Shutter Effect."},{"key":"e_1_3_2_2_68_1","unstructured":"Zhenheng Yang Peng Wang Wei Xu Liang Zhao and Ramakant Nevatia. 2017. Unsupervised learning of geometry with edge-aware depth-normal consistency. arXiv preprint arXiv:1711.03665 (2017).  Zhenheng Yang Peng Wang Wei Xu Liang Zhao and Ramakant Nevatia. 2017. Unsupervised learning of geometry with edge-aware depth-normal consistency. arXiv preprint arXiv:1711.03665 (2017)."},{"key":"e_1_3_2_2_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00212"},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"publisher","DOI":"10.1007\/BFb0028345"},{"key":"e_1_3_2_2_71_1","unstructured":"Christopher Zach Manfred Klopschitz and Marc Pollefeys. 2010. Disambiguating visual relations using loop constraints. (2010) 1426--1433.  Christopher Zach Manfred Klopschitz and Marc Pollefeys. 2010. Disambiguating visual relations using loop constraints. (2010) 1426--1433."},{"key":"e_1_3_2_2_72_1","unstructured":"Chaoqiang Zhao Qiyu Sun Chongzhen Zhang Yang Tang and Feng Qian. 2020. Monocular Depth Estimation Based On Deep Learning: An Overview. arXiv preprint arXiv:2003.06620 (2020).  Chaoqiang Zhao Qiyu Sun Chongzhen Zhang Yang Tang and Feng Qian. 2020. Monocular Depth Estimation Based On Deep Learning: An Overview. arXiv preprint arXiv:2003.06620 (2020)."},{"key":"e_1_3_2_2_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.196"},{"key":"e_1_3_2_2_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.700"},{"key":"e_1_3_2_2_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.52"},{"key":"e_1_3_2_2_76_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01228-1_3"}],"event":{"name":"MM '20: The 28th ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Seattle WA USA","acronym":"MM '20"},"container-title":["Proceedings of the 28th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394171.3413706","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3394171.3413706","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:01:16Z","timestamp":1750197676000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3394171.3413706"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10,12]]},"references-count":76,"alternative-id":["10.1145\/3394171.3413706","10.1145\/3394171"],"URL":"https:\/\/doi.org\/10.1145\/3394171.3413706","relation":{},"subject":[],"published":{"date-parts":[[2020,10,12]]},"assertion":[{"value":"2020-10-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}