{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,11]],"date-time":"2025-03-11T04:31:59Z","timestamp":1741667519215,"version":"3.38.0"},"reference-count":242,"publisher":"SPIE-Intl Soc Optical Eng","issue":"02","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J. Electron. Imag."],"published-print":{"date-parts":[[2025,3,10]]},"DOI":"10.1117\/1.jei.34.2.020901","type":"journal-article","created":{"date-parts":[[2025,3,10]],"date-time":"2025-03-10T04:59:01Z","timestamp":1741582741000},"source":"Crossref","is-referenced-by-count":0,"title":["Review of monocular depth estimation methods"],"prefix":"10.1117","volume":"34","author":[{"given":"Zhimin","family":"Zhang","sequence":"first","affiliation":[{"name":"Luoyang Normal University, College of Information Technology, Luoyang, China"}]},{"given":"Yongxin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Luoyang Normal University, College of Information Technology, Luoyang, China"}]},{"given":"Yun","family":"Li","sequence":"additional","affiliation":[{"name":"Northeastern University, College of Information Science and Engineering, Shenyang, China"}]},{"given":"Linli","family":"Wu","sequence":"additional","affiliation":[{"name":"Luoyang Normal University, College of Information Technology, Luoyang, China"}]}],"member":"189","reference":[{"key":"r1","doi-asserted-by":"crossref","DOI":"10.1109\/ICB2018.2018.00031","article-title":"Improving 2D face recognition via discriminative face depth estimation","author":"Cui","year":"2018"},{"key":"r2","doi-asserted-by":"publisher","DOI":"10.1007\/s11548-016-1360-0"},{"article-title":"Augmented reality for depth cues in monocular minimally invasive surgery","year":"2017","author":"Chen","key":"r3"},{"key":"r4","doi-asserted-by":"publisher","DOI":"10.1007\/11866565_45"},{"key":"r5","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2016.7532309"},{"key":"r6","doi-asserted-by":"publisher","DOI":"10.1109\/VR.2017.7892247"},{"key":"r7","doi-asserted-by":"publisher","DOI":"10.1145\/2601097.2601134"},{"key":"r8","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2011.5995316","article-title":"Real-time human pose recognition in parts from single depth images","author":"Shotton","year":"2011"},{"key":"r9","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2002.802926"},{"key":"r10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8462833"},{"key":"r11","doi-asserted-by":"crossref","DOI":"10.1109\/IROS.2004.1389947","article-title":"3D modeling of indoor environments by a mobile robot with a laser scanner and panoramic camera","author":"Biber","year":"2004"},{"key":"r12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2003.1211354"},{"key":"r13","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2013.2265378"},{"key":"r14","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.172"},{"key":"r15","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2010.2101060"},{"key":"r16","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-10520-3_21","article-title":"Dense depth maps from low resolution time-of-flight depth and high resolution color views","author":"Bartczak","year":"2009"},{"key":"r17","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR52729.2023.02099","article-title":"Iterative geometry encoding volume for stereo matching","author":"Xu","year":"2023"},{"key":"r18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00134"},{"article-title":"Single-image depth perception in the wild","year":"2016","author":"Chen","key":"r19"},{"key":"r20","first-page":"1863","article-title":"Learning monocular depth in dynamic scenes via instance-aware projection consistency","author":"Lee","year":"2021"},{"key":"r21","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-61123-1_183"},{"year":"1970","author":"Horn","key":"r22"},{"key":"r23","doi-asserted-by":"publisher","DOI":"10.1016\/0004-3702(81)90019-9"},{"key":"r24","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.1983.4767367"},{"key":"r25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.1997.609323"},{"key":"r26","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2016.614","article-title":"Efficient deep learning for stereo matching","author":"Luo","year":"2016"},{"key":"r27","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.12.089"},{"article-title":"Learning depth from single monocular images","year":"2005","author":"Saxena","key":"r28"},{"key":"r29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"r30","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2016.350","article-title":"The cityscapes dataset for semantic urban scene understanding","author":"Cordts","year":"2016"},{"key":"r31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"r32","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2016.438","article-title":"A large dataset to train convolutional networks for disparity, optical flow, and scene flow estimation","author":"Mayer","year":"2016"},{"key":"r33","first-page":"2366","article-title":"Depth map prediction from a single image using a multi-scale deep network","author":"Eigen","year":"2014"},{"key":"r34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2007.4408828"},{"key":"r35","first-page":"953","article-title":"Analysis and evaluation of several typical SFS algorithms","author":"Liao","year":"2001"},{"key":"r36","first-page":"39","article-title":"Estimation of illuminant direction, albedo, and shape from shading","author":"Zheng","year":"2002"},{"key":"r37","doi-asserted-by":"publisher","DOI":"10.1109\/34.784284"},{"key":"r38","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.1992.223150","article-title":"A simple algorithm for shape from shading","author":"Bichsel","year":"1992"},{"key":"r39","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.1984.4767501"},{"article-title":"Estimation of illuminant direction, Albedo, and shape from shading","year":"2002","author":"Zheng","key":"r40"},{"key":"r41","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.1999.790410"},{"key":"r42","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"r43","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2007.09.014"},{"key":"r44","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2005.177","article-title":"Histograms of oriented gradients for human detection","author":"Dalal","year":"2005"},{"key":"r45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126544"},{"article-title":"Features from accelerated segment test (fast)","year":"2009","author":"Viswanathan","key":"r46"},{"key":"r47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15561-1_27"},{"key":"r48","doi-asserted-by":"publisher","DOI":"10.1145\/2001269.2001293"},{"key":"r49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33709-3_4"},{"key":"r50","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10578-9_4"},{"key":"r51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298703"},{"key":"r52","doi-asserted-by":"publisher","DOI":"10.1109\/34.601246"},{"key":"r53","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2016.445","article-title":"Structure-from-motion revisited","author":"Schonberger","year":"2016"},{"article-title":"Reconstructing the world in six days","year":"2015","author":"Jared","key":"r54"},{"key":"r55","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2016.31"},{"key":"r56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539782"},{"key":"r57","doi-asserted-by":"crossref","DOI":"10.1109\/ICCV.2015.98","article-title":"Optimizing the viewing graph for structure-from-motion","author":"Sweeney","year":"2015"},{"key":"r58","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10578-9_5"},{"key":"r59","first-page":"440","article-title":"The perception of the visual world","author":"Gibson","year":"1951"},{"key":"r60","doi-asserted-by":"crossref","DOI":"10.5244\/C.19.8","article-title":"Shape from non-homogeneous, non-stationary, anisotropic, perspective texture","author":"Loh","year":"2005"},{"key":"r61","first-page":"345","article-title":"Shape from texture","author":"Aloimonos","year":"1988"},{"key":"r62","doi-asserted-by":"publisher","DOI":"10.1016\/0004-3702(89)90066-0"},{"article-title":"Shape from contour","year":"1980","author":"Witkin","key":"r63"},{"key":"r64","first-page":"2","article-title":"Recovering intrinsic scene characteristics","author":"Barrow","year":"1978"},{"key":"r65","doi-asserted-by":"publisher","DOI":"10.1364\/AOP.3.000128"},{"key":"r66","doi-asserted-by":"crossref","DOI":"10.1109\/ICIP.2002.1040012","article-title":"Hmm-based surface reconstruction from single images","author":"Nagai","year":"2002"},{"key":"r67","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2014.2303162"},{"key":"r68","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2007.383191","article-title":"Learning conditional random fields for stereo","author":"Scharstein","year":"2007"},{"key":"r69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539823"},{"key":"r70","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2011.5995551","article-title":"Repetition-based dense single-view reconstruction","author":"Wu","year":"2011"},{"key":"r71","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-007-0071-y"},{"key":"r72","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2316835"},{"key":"r73","doi-asserted-by":"publisher","DOI":"10.1023\/A:1011139631724"},{"key":"r74","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.147"},{"key":"r75","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33715-4_56"},{"key":"r76","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2012.6238903"},{"key":"r77","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2014.97","article-title":"Discrete-continuous depth estimation from a single image","author":"Liu","year":"2014"},{"key":"r78","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2016.594","article-title":"Monocular depth estimation using neural regression forest","author":"Roy","year":"2016"},{"key":"r79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00281"},{"article-title":"Predicting depth, surface normals and semantic labels with a common multi-scale convolutional architecture","year":"2014","author":"Eigen","key":"r80"},{"key":"r81","doi-asserted-by":"crossref","DOI":"10.1109\/3DV.2016.32","article-title":"Deeper depth prediction with fully convolutional residual networks","author":"Laina","year":"2016"},{"key":"r82","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2015.7298965","article-title":"Fully convolutional networks for semantic segmentation","author":"Long","year":"2015"},{"key":"r83","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00069"},{"key":"r84","doi-asserted-by":"crossref","DOI":"10.1109\/ICCV.2019.00578","article-title":"Enforcing geometric constraints of virtual normal for depth prediction","author":"Yin","year":"2019"},{"key":"r85","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR52729.2023.01391","article-title":"Revealing the dark secrets of masked image modeling","author":"Xie","year":"2023"},{"article-title":"Very deep convolutional networks for large-scale image recognition","year":"2014","author":"Simonyan","key":"r86"},{"key":"r87","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"r88","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2821979"},{"key":"r89","doi-asserted-by":"crossref","DOI":"10.1109\/CVPRW53098.2021.00288","article-title":"Fast and accurate single-image depth estimation on mobile devices, mobile AI 2021 challenge: report","author":"Ignatov","year":"2021"},{"key":"r90","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01203"},{"key":"r91","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2836318"},{"key":"r92","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2018.00214","article-title":"Deep ordinal regression network for monocular depth estimation","author":"Fu","year":"2018"},{"key":"r93","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-030-01219-9_14","article-title":"Monocular depth estimation with affinity, vertical pooling, and label enhancement","author":"Gan","year":"2018"},{"article-title":"Depth from a single image by harmonizing overcomplete local network predictions","year":"2016","author":"Chakrabarti","key":"r94"},{"key":"r95","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560885"},{"key":"r96","doi-asserted-by":"publisher","DOI":"10.1007\/s11633-023-1458-0"},{"key":"r97","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR52688.2022.00166","article-title":"P3Depth: monocular depth estimation with a piecewise planarity prior","author":"Patil","year":"2022"},{"article-title":"Va-DepthNet: a variational approach to single image depth prediction","year":"2023","author":"Liu","key":"r98"},{"key":"r99","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01822"},{"key":"r100","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2017.2657002"},{"key":"r101","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00066"},{"article-title":"Convolutional LSTM network: a machine learning approach for precipitation nowcasting","year":"2015","author":"Xingjian","key":"r102"},{"key":"r103","doi-asserted-by":"publisher","DOI":"10.7717\/peerj-cs.317"},{"key":"r104","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR46437.2021.00145","article-title":"Depth from camera motion and object detection","author":"Griffin","year":"2021"},{"key":"r105","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2022.3165860"},{"key":"r106","doi-asserted-by":"crossref","DOI":"10.1109\/ICCV.2015.52","article-title":"Learning ordinal relationships for mid-level vision","author":"Zoran","year":"2015"},{"key":"r107","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00996"},{"key":"r108","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2015.7299152","article-title":"Deep convolutional neural fields for depth estimation from a single image","author":"Liu","year":"2015"},{"key":"r109","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2018.00412","article-title":"Structured attention guided convolutional neural fields for monocular depth estimation","author":"Xu","year":"2018"},{"key":"r110","first-page":"1426","article-title":"Monocular depth estimation using multi-scale continuous CRFs as sequential deep networks","author":"Ricci","year":"2018"},{"article-title":"Depth and surface normal estimation from monocular images using regression on deep features and hierarchical CRFs","year":"2015","author":"Li","key":"r111"},{"key":"r112","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2017.2740321"},{"key":"r113","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00389"},{"key":"r114","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298897"},{"key":"r115","doi-asserted-by":"crossref","DOI":"10.1109\/3DV.2016.69","article-title":"Joint semantic segmentation and depth estimation with deep convolutional networks","author":"Mousavian","year":"2016"},{"key":"r116","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00400"},{"key":"r117","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2024.3416065"},{"key":"r118","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR52729.2023.02057","article-title":"iDISC: internal discretization for monocular depth estimation","author":"Piccinelli","year":"2023"},{"key":"r119","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-20870-7_41"},{"key":"r120","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2019.00114"},{"key":"r121","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-030-58574-7_35","article-title":"Guiding monocular depth estimation using depth-attention volume","author":"Huynh","year":"2020"},{"key":"r122","doi-asserted-by":"crossref","DOI":"10.1109\/ICCV48922.2021.01196","article-title":"Vision transformers for dense prediction","author":"Ranftl","year":"2021"},{"key":"r123","doi-asserted-by":"publisher","DOI":"10.3390\/s22103849"},{"key":"r124","doi-asserted-by":"crossref","DOI":"10.1109\/HPCC-DSS-SmartCity-DependSys57074.2022.00271","article-title":"Depth monocular estimation with attention-based encoder-decoder network from single image","author":"Zhang","year":"2022"},{"key":"r125","doi-asserted-by":"crossref","DOI":"10.1109\/WACV51458.2022.00242","article-title":"Edgeconv with attention module for monocular depth estimation","author":"Lee","year":"2022"},{"key":"r126","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP46576.2022.9897187"},{"key":"r127","doi-asserted-by":"crossref","DOI":"10.1109\/ICCV48922.2021.00986","article-title":"Swin Transformer: hierarchical vision transformer using shifted windows","author":"Liu","year":"2021"},{"key":"r128","doi-asserted-by":"publisher","DOI":"10.1007\/s13042-020-01251-y"},{"key":"r129","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00581"},{"key":"r130","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2022.104520"},{"article-title":"SideRT: a real-time pure transformer architecture for single image depth estimation","year":"2022","author":"Shu","key":"r131"},{"key":"r132","doi-asserted-by":"publisher","DOI":"10.1109\/ICICCSP53532.2022.9862348"},{"key":"r133","first-page":"2672","article-title":"Generative adversarial nets","author":"Goodfellow","year":"2014"},{"key":"r134","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.2973068"},{"key":"r135","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-018-6694-x"},{"key":"r136","doi-asserted-by":"crossref","DOI":"10.1109\/ICIP.2017.8296575","article-title":"Depth prediction from a single image with conditional adversarial networks","author":"Jung","year":"2017"},{"key":"r137","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00163"},{"key":"r138","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3060435"},{"key":"r139","doi-asserted-by":"publisher","DOI":"10.3390\/electronics12051189"},{"article-title":"Stylegan knows normal, depth, Albedo, and more","year":"2024","author":"Bhattad","key":"r140"},{"key":"r141","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-023-0350-8"},{"key":"r142","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00014"},{"key":"r143","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR52733.2024.00927","article-title":"WorDepth: variational language prior for monocular depth estimation","author":"Zeng","year":"2024"},{"article-title":"Monocular depth estimation using diffusion models","year":"2023","author":"Saxena","key":"r144"},{"key":"r145","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"r146","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2017.699","article-title":"Unsupervised monocular depth estimation with left-right consistency","author":"Godard","year":"2017"},{"article-title":"Spatial transformer networks","year":"2015","author":"Jaderberg","key":"r147"},{"key":"r148","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2018.00045"},{"key":"r149","doi-asserted-by":"crossref","DOI":"10.1109\/CVPRW.2019.00348","article-title":"Learn stereo, infer mono: Siamese networks for self-supervised, monocular, depth estimation","author":"Goldman","year":"2019"},{"key":"r150","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2019.00579","article-title":"Bilateral cyclic constraint and adaptive regularization for unsupervised monocular depth prediction","author":"Wong","year":"2019"},{"key":"r151","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01003"},{"key":"r152","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR52729.2023.02052","article-title":"PlaneDepth: self-supervised depth estimation via orthogonal planes","author":"Wang","year":"2023"},{"key":"r153","doi-asserted-by":"crossref","DOI":"10.1109\/WACV56688.2023.00573","article-title":"Self-supervised monocular depth estimation: solving the edge-fattening problem","author":"Chen","year":"2023"},{"key":"r154","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00871"},{"key":"r155","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2017.700","article-title":"Unsupervised learning of depth and ego-motion from video","author":"Zhou","year":"2017"},{"key":"r156","doi-asserted-by":"crossref","DOI":"10.1109\/ICCV.2019.00393","article-title":"Digging into self-supervised monocular depth estimation","author":"Godard","year":"2019"},{"article-title":"Unsupervised learning of geometry with edge-aware depth-normal consistency","year":"2017","author":"Yang","key":"r157"},{"key":"r158","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2018.00594","article-title":"Unsupervised learning of depth and ego-motion from monocular video using 3D geometric constraints","author":"Mahjourian","year":"2018"},{"article-title":"SFM-Net: learning of structure and motion from video","year":"2017","author":"Vijayanarasimhan","key":"r159"},{"key":"r160","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01484-6"},{"key":"r161","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2017.596","article-title":"Demon: depth and motion network for learning monocular stereo","author":"Ummenhofer","year":"2017"},{"article-title":"BA-Net: dense bundle adjustment network","year":"2018","author":"Tang","key":"r162"},{"key":"r163","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2018.00216","article-title":"Learning depth from monocular videos using direct methods","author":"Wang","year":"2018"},{"key":"r164","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461251"},{"key":"r165","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2018.00043","article-title":"Unsupervised learning of monocular depth estimation and visual odometry with deep feature reconstruction","author":"Zhan","year":"2018"},{"key":"r166","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794425"},{"key":"r167","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR42600.2020.00136","article-title":"D3vo: deep depth, deep pose and deep uncertainty for monocular visual odometry","author":"Yang","year":"2020"},{"key":"r168","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561508"},{"key":"r169","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-030-01237-3_50","article-title":"Deep virtual stereo odometry: leveraging deep depth prediction for monocular direct sparse odometry","author":"Yang","year":"2018"},{"key":"r170","doi-asserted-by":"publisher","DOI":"10.3390\/s21216956"},{"key":"r171","doi-asserted-by":"crossref","DOI":"10.1109\/ICIP42928.2021.9506510","article-title":"Attention-based self-supervised learning monocular depth estimation with edge refinement","author":"Jiang","year":"2021"},{"key":"r172","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3074306"},{"key":"r173","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00668"},{"key":"r174","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2022.01.016"},{"key":"r175","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3210298"},{"article-title":"Attention is all you need","year":"2017","author":"Vaswani","key":"r176"},{"key":"r177","doi-asserted-by":"crossref","DOI":"10.5220\/0010884000003124","article-title":"Transformers in self-supervised monocular depth estimation with unknown camera intrinsics","author":"Varma","year":"2022"},{"key":"r178","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR52729.2023.01778","article-title":"Lite-Mono: a lightweight CNN and transformer architecture for self-supervised monocular depth estimation","author":"Zhang","year":"2023"},{"key":"r179","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3196781"},{"key":"r180","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.10.073"},{"key":"r181","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-99-8546-3_36"},{"key":"r182","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR42600.2020.00329","article-title":"On the uncertainty of self-supervised monocular depth estimation","author":"Poggi","year":"2020"},{"key":"r183","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR42600.2020.00256","article-title":"3D packing for self-supervised monocular depth estimation","author":"Guizilini","year":"2020"},{"key":"r184","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00481"},{"key":"r185","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3275584"},{"key":"r186","doi-asserted-by":"crossref","DOI":"10.1109\/ICCV51070.2023.01485","article-title":"GasMono: geometry-aided self-supervised monocular depth estimation for indoor scenes","author":"Zhao","year":"2023"},{"article-title":"SurroundDepth: entangling surrounding views for self-supervised multi-camera depth estimation","year":"2023","author":"Wei","key":"r187"},{"key":"r188","doi-asserted-by":"crossref","DOI":"10.1109\/ICCV.2019.00294","article-title":"Sequential adversarial learning for self-supervised deep visual odometry","author":"Li","year":"2019"},{"key":"r189","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793512"},{"key":"r190","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR46437.2021.00122","article-title":"The temporal opportunist: self-supervised multi-frame monocular depth","author":"Watson","year":"2021"},{"key":"r191","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR46437.2021.00605","article-title":"MonoRec: semi-supervised dense reconstruction in dynamic environments from a single moving camera","author":"Wimbauer","year":"2021"},{"key":"r192","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19812-0_33"},{"article-title":"MonoFormer: towards generalization of self-supervised monocular depth estimation with transformers","year":"2022","author":"Bae","key":"r193"},{"key":"r194","doi-asserted-by":"crossref","DOI":"10.1109\/3DV57658.2022.00077","article-title":"MonoViT: self-supervised monocular depth estimation with a vision transformer","author":"Zhao","year":"2022"},{"key":"r195","doi-asserted-by":"crossref","DOI":"10.1109\/ICCV51070.2023.00818","article-title":"Self-supervised monocular depth estimation: let\u2019s talk about the weather","author":"Saunders","year":"2023"},{"key":"r196","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v38i6.28383","article-title":"SQLDepth: generalizable self-supervised fine-structured monocular depth estimation","author":"Wang","year":"2024"},{"article-title":"MonoDVPS: a self-supervised monocular depth estimation approach to depth-aware video panoptic segmentation","year":"2022","author":"Petrovai","key":"r197"},{"key":"r198","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00019"},{"key":"r199","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2018.00212","article-title":"GeoNet: unsupervised learning of dense depth, optical flow and camera pose","author":"Yin","year":"2018"},{"key":"r200","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00716"},{"key":"r201","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00051"},{"key":"r202","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00062"},{"key":"r203","doi-asserted-by":"crossref","DOI":"10.1109\/ICCV48922.2021.00840","article-title":"Domain adaptive semantic segmentation with self-supervised depth estimation","author":"Wang","year":"2021"},{"key":"r204","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-20893-6_19"},{"key":"r205","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-030-01249-6_15","article-title":"Joint task-recursive learning for semantic segmentation and depth estimation","author":"Zhang","year":"2018"},{"key":"r206","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2019.00273","article-title":"Towards scene understanding: unsupervised monocular depth estimation with semantic-aware representation","author":"Chen","year":"2019"},{"key":"r207","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58565-5_35"},{"key":"r208","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-023-01799-6"},{"key":"r209","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.109297"},{"key":"r210","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110770"},{"key":"r211","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2023.103753"},{"key":"r212","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01718-1"},{"key":"r213","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR52733.2024.01885","article-title":"SelfOcc: self-supervised vision-based 3D occupancy prediction","author":"Huang","year":"2024"},{"key":"r214","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01252"},{"key":"r215","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-030-01228-1_3","article-title":"DF-Net: unsupervised joint learning of depth and flow using cross-task consistency","author":"Zou","year":"2018"},{"key":"r216","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3145057"},{"key":"r217","doi-asserted-by":"publisher","DOI":"10.1007\/s11063-023-11325-x"},{"article-title":"FlowDepth: decoupling optical flow for self-supervised monocular depth estimation","year":"2024","author":"Sun","key":"r218"},{"article-title":"Sparse-to-continuous: enhancing monocular depth estimation using occupancy maps","year":"2019","author":"dos Santos Rosa","key":"r219"},{"key":"r220","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2019.00343","article-title":"DeepLiDAR: deep surface normal guided depth prediction for outdoor scene from sparse lidar data and single color image","author":"Qiu","year":"2019"},{"key":"r221","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2017.238","article-title":"Semi-supervised deep learning for monocular depth map prediction","author":"Kuznietsov","year":"2017"},{"key":"r222","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-030-01225-0_11","article-title":"Estimating depth from rgb and sparse sensing","author":"Chen","year":"2018"},{"key":"r223","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2018.00024","article-title":"Single view stereo matching","author":"Luo","year":"2018"},{"article-title":"Robust semi-supervised monocular depth estimation with reprojected distances","year":"2020","author":"Guizilini","key":"r224"},{"key":"r225","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3061343"},{"article-title":"DiffusionDepth: diffusion denoising approach for monocular depth estimation","year":"2023","author":"Duan","key":"r226"},{"key":"r227","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2857703"},{"key":"r228","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP42928.2021.9506550"},{"key":"r229","doi-asserted-by":"crossref","DOI":"10.1109\/ICIP42928.2021.9506550","article-title":"Depth estimation from monocular images and sparse radar using deep ordinal regression network","author":"Lo","year":"2021"},{"article-title":"Depth estimation via sparse radar prior and driving scene semantics","year":"2022","author":"Zheng","key":"r230"},{"key":"r231","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR52729.2023.00895","article-title":"Depth estimation from camera image and mmWave radar point cloud","author":"Singh","year":"2023"},{"key":"r232","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2022.103601"},{"article-title":"T2Net: synthetic-to-realistic translation for solving single-image depth estimation tasks","year":"2018","author":"Zheng","key":"r233"},{"key":"r234","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00339"},{"article-title":"CyCADA: cycle-consistent adversarial domain adaptation","year":"2018","author":"Hoffman","key":"r235"},{"key":"r236","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00037"},{"key":"r237","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2018.00212","article-title":"Geonet: unsupervised learning of dense depth, optical flow and camera pose","author":"Yin","year":"2018"},{"key":"r238","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2896963"},{"article-title":"Unsupervised monocular depth learning in dynamic scenes","year":"2010","author":"Li","key":"r239"},{"key":"r240","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-022-0279-3"},{"key":"r241","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2022.3199265"},{"key":"r242","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611100"}],"container-title":["Journal of Electronic Imaging"],"original-title":[],"deposited":{"date-parts":[[2025,3,10]],"date-time":"2025-03-10T19:52:29Z","timestamp":1741636349000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.spiedigitallibrary.org\/journals\/journal-of-electronic-imaging\/volume-34\/issue-02\/020901\/Review-of-monocular-depth-estimation-methods\/10.1117\/1.JEI.34.2.020901.full"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,10]]},"references-count":242,"journal-issue":{"issue":"02","published-online":{"date-parts":[[2025,3,1]]}},"URL":"https:\/\/doi.org\/10.1117\/1.jei.34.2.020901","relation":{},"ISSN":["1017-9909"],"issn-type":[{"type":"print","value":"1017-9909"}],"subject":[],"published":{"date-parts":[[2025,3,10]]}}}