{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:16:31Z","timestamp":1755839791747,"version":"3.37.3"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T00:00:00Z","timestamp":1733356800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T00:00:00Z","timestamp":1733356800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61976227","62176096","62076257"],"award-info":[{"award-number":["61976227","62176096","62076257"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Complex Intell. Syst."],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s40747-024-01688-6","type":"journal-article","created":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T08:23:15Z","timestamp":1733386995000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Image depth estimation assisted by multi-view projection"],"prefix":"10.1007","volume":"11","author":[{"given":"Liman","family":"Liu","sequence":"first","affiliation":[]},{"given":"Jinshan","family":"Tian","sequence":"additional","affiliation":[]},{"given":"Guansheng","family":"Luo","sequence":"additional","affiliation":[]},{"given":"Siyuan","family":"Xu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5878-6016","authenticated-orcid":false,"given":"Chen","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Huaifei","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Wenbing","family":"Tao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,5]]},"reference":[{"key":"1688_CR1","doi-asserted-by":"crossref","unstructured":"Li B, Shen C, Dai Y, Van Den\u00a0Hengel A, He M (2015) Depth and surface normal estimation from monocular images using regression on deep features and hierarchical CRFs. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1119\u20131127","DOI":"10.1109\/CVPR.2015.7298715"},{"key":"1688_CR2","doi-asserted-by":"crossref","unstructured":"Knobelreiter P, Reinbacher C, Shekhovtsov A, Pock T (2017) End-to-end training of hybrid CNN-CRF models for stereo. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2339\u20132348","DOI":"10.1109\/CVPR.2017.159"},{"issue":"11","key":"1688_CR3","doi-asserted-by":"publisher","first-page":"3174","DOI":"10.1109\/TCSVT.2017.2740321","volume":"28","author":"Y Cao","year":"2017","unstructured":"Cao Y, Wu Z, Shen C (2017) Estimating depth from monocular images as classification using deep fully convolutional residual networks. IEEE Trans Circuits Syst Video Technol 28(11):3174\u20133182","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"1688_CR4","doi-asserted-by":"crossref","unstructured":"Chen H, Chen H-C, Sun C-H et al (2024) Apply fuzzy mask to improve monocular depth estimation. Int J Fuzzy Syst 26(4):1143\u20131157","DOI":"10.1007\/s40815-023-01657-0"},{"key":"1688_CR5","doi-asserted-by":"crossref","unstructured":"Han C, Lv C, Kou Q et al (2024) DCL-depth: monocular depth estimation network based on iam and depth consistency loss. Multimed Tools Appl 1\u201315","DOI":"10.1007\/s11042-024-18877-7"},{"key":"1688_CR6","doi-asserted-by":"crossref","unstructured":"Xu D, Ricci E, Ouyang W, Wang X, Sebe N (2017) Multi-scale continuous CRFs as sequential deep networks for monocular depth estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5354\u20135362","DOI":"10.1109\/CVPR.2017.25"},{"key":"1688_CR7","doi-asserted-by":"crossref","unstructured":"Yin W, Liu Y, Shen C, Yan Y (2019) Enforcing geometric constraints of virtual normal for depth prediction. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 5684\u20135693","DOI":"10.1109\/ICCV.2019.00578"},{"key":"1688_CR8","unstructured":"Lee JH, Han M-K, Ko DW, Suh IH (2019) From big to small: multi-scale local planar guidance for monocular depth estimation. arXiv preprint arXiv:1907.10326"},{"key":"1688_CR9","doi-asserted-by":"crossref","unstructured":"Huynh L, Nguyen-Ha P, Matas J, Rahtu E, Heikkil\u00e4 J (2020) Guiding monocular depth estimation using depth-attention volume. In: Computer vision\u2014ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, 2020, proceedings, Part XXVI 16, pp 581\u2013597","DOI":"10.1007\/978-3-030-58574-7_35"},{"issue":"21","key":"1688_CR10","doi-asserted-by":"publisher","first-page":"24804","DOI":"10.1007\/s10489-023-04851-7","volume":"53","author":"C Liu","year":"2023","unstructured":"Liu C, Zuo W, Yang G, Li W, Wen F, Zhang H, Zang T (2023) Relative order constraint for monocular depth estimation. Appl Intell 53(21):24804\u201324821","journal-title":"Appl Intell"},{"issue":"4","key":"1688_CR11","doi-asserted-by":"publisher","first-page":"6813","DOI":"10.1109\/LRA.2020.3017478","volume":"5","author":"V Patil","year":"2020","unstructured":"Patil V, Van Gansbeke W, Dai D, Van Gool L (2020) Don\u2019t forget the past: recurrent depth estimation from monocular video. IEEE Robot Autom Lett 5(4):6813\u20136820","journal-title":"IEEE Robot Autom Lett"},{"key":"1688_CR12","doi-asserted-by":"crossref","unstructured":"Zhang H, Shen C, Li Y, Cao Y, Liu Y, Yan Y (2019) Exploiting temporal consistency for real-time video depth estimation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 1725\u20131734","DOI":"10.1109\/ICCV.2019.00181"},{"key":"1688_CR13","doi-asserted-by":"crossref","unstructured":"Wang R, Pizer SM, Frahm J-M (2019) Recurrent neural network for (un-)supervised learning of monocular video visual odometry and depth. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5555\u20135564","DOI":"10.1109\/CVPR.2019.00570"},{"issue":"8","key":"1688_CR14","doi-asserted-by":"publisher","first-page":"3701","DOI":"10.1007\/s00371-023-02995-8","volume":"39","author":"Y Li","year":"2023","unstructured":"Li Y, Luo F, Xiao C (2023) Monocular human depth estimation with 3D motion flow and surface normals. Vis Comput 39(8):3701\u20133713","journal-title":"Vis Comput"},{"issue":"6","key":"1688_CR15","doi-asserted-by":"publisher","first-page":"3920","DOI":"10.1109\/TII.2020.3011067","volume":"17","author":"L Li","year":"2020","unstructured":"Li L, Li X, Yang S, Ding S, Jolfaei A, Zheng X (2020) Unsupervised-learning-based continuous depth and motion estimation with monocular endoscopy for virtual reality minimally invasive surgery. IEEE Trans Ind Inf 17(6):3920\u20133928","journal-title":"IEEE Trans Ind Inf"},{"key":"1688_CR16","doi-asserted-by":"crossref","unstructured":"Wimbauer F, Yang N, Von\u00a0Stumberg L, Zeller N, Cremers D (2021) MonoRec: semi-supervised dense reconstruction in dynamic environments from a single moving camera. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6112\u20136122","DOI":"10.1109\/CVPR46437.2021.00605"},{"key":"1688_CR17","doi-asserted-by":"crossref","unstructured":"Liu C, Gu J, Kim K, Narasimhan SG, Kautz J (2019) Neural RGB(r)D sensing: depth and uncertainty from a video camera. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10986\u201310995","DOI":"10.1109\/CVPR.2019.01124"},{"key":"1688_CR18","unstructured":"Saxena A, Chung SH, Ng AY (2005) Learning depth from single monocular images. In: Proceedings of the 18th International Conference on Neural Information Processing Systems, pp 1161\u20131168"},{"key":"1688_CR19","unstructured":"Saxena A, Schulte J, Ng AY et al (2007) Depth estimation using monocular and stereo cues. In: IJCAI, vol 7, pp 2197\u20132203"},{"key":"1688_CR20","doi-asserted-by":"crossref","unstructured":"Hoiem D, Efros AA, Hebert M (2005) Automatic photo pop-up. In: ACM SIGGRAPH 2005 papers, pp 577\u2013584","DOI":"10.1145\/1186822.1073232"},{"key":"1688_CR21","doi-asserted-by":"crossref","unstructured":"Liu B, Gould S, Koller D (2010) Single image depth estimation from predicted semantic labels. In: 2010 IEEE Computer Society conference on computer vision and pattern recognition, pp 1253\u20131260","DOI":"10.1109\/CVPR.2010.5539823"},{"key":"1688_CR22","doi-asserted-by":"crossref","unstructured":"Ladicky L, Shi J, Pollefeys M (2014) Pulling things out of perspective. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 89\u201396","DOI":"10.1109\/CVPR.2014.19"},{"key":"1688_CR23","unstructured":"Eigen D, Puhrsch C, Fergus R (2014) Depth map prediction from a single image using a multi-scale deep network. In: Proceedings of the 27th International Conference on Neural Information Processing Systems, pp 2366\u20132374"},{"key":"1688_CR24","doi-asserted-by":"crossref","unstructured":"Wang X, Fouhey D, Gupta A (2015) Designing deep networks for surface normal estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 539\u2013547","DOI":"10.1109\/CVPR.2015.7298652"},{"key":"1688_CR25","doi-asserted-by":"crossref","unstructured":"Fu H, Gong M, Wang C, Batmanghelich K, Tao D (2018) Deep ordinal regression network for monocular depth estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2002\u20132011","DOI":"10.1109\/CVPR.2018.00214"},{"key":"1688_CR26","doi-asserted-by":"crossref","unstructured":"Gan Y, Xu X, Sun W, Lin L (2018) Monocular depth estimation with affinity, vertical pooling, and label enhancement. In: Proceedings of the European conference on computer vision (ECCV), pp 224\u2013239","DOI":"10.1007\/978-3-030-01219-9_14"},{"issue":"4","key":"1688_CR27","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L-C Chen","year":"2017","unstructured":"Chen L-C, Papandreou G, Kokkinos I, Murphy K, Yuille AL (2017) DeepLab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs. IEEE Trans Pattern Anal Mach Intell 40(4):834\u2013848","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1688_CR28","doi-asserted-by":"crossref","unstructured":"Yin Z, Shi J (2018) GeoNet: unsupervised learning of dense depth, optical flow and camera pose. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1983\u20131992","DOI":"10.1109\/CVPR.2018.00212"},{"key":"1688_CR29","doi-asserted-by":"crossref","unstructured":"Mahjourian R, Wicke M, Angelova A (2018) Unsupervised learning of depth and ego-motion from monocular video using 3D geometric constraints. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5667\u20135675","DOI":"10.1109\/CVPR.2018.00594"},{"key":"1688_CR30","doi-asserted-by":"crossref","unstructured":"Wang C, Buenaposada JM, Zhu R, Lucey S (2018) Learning depth from monocular videos using direct methods. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2022\u20132030","DOI":"10.1109\/CVPR.2018.00216"},{"key":"1688_CR31","doi-asserted-by":"crossref","unstructured":"Watson J, Mac\u00a0Aodha O, Prisacariu V, Brostow G, Firman M (2021) The temporal opportunist: self-supervised multi-frame monocular depth. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1164\u20131174","DOI":"10.1109\/CVPR46437.2021.00122"},{"key":"1688_CR32","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1688_CR33","doi-asserted-by":"crossref","unstructured":"Teed Z, Deng J (2020) RAFT: recurrent all-pairs field transforms for optical flow. In: Computer vision\u2014ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, 2020, proceedings, Part II 16, pp. 402\u2013419","DOI":"10.1007\/978-3-030-58536-5_24"},{"issue":"4","key":"1688_CR34","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang Z, Bovik AC, Sheikh HR, Simoncelli EP (2004) Image quality assessment: from error visibility to structural similarity. IEEE Trans Image Process 13(4):600\u2013612","journal-title":"IEEE Trans Image Process"},{"key":"1688_CR35","doi-asserted-by":"crossref","unstructured":"Geiger A, Lenz P, Urtasun R (2012) Are we ready for autonomous driving? The KITTI vision benchmark suite. In: 2012 IEEE conference on computer vision and pattern recognition, pp 3354\u20133361","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"1688_CR36","doi-asserted-by":"crossref","unstructured":"Gaidon A, Wang Q, Cabon Y, Vig E (2016) Virtual worlds as proxy for multi-object tracking analysis. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4340\u20134349","DOI":"10.1109\/CVPR.2016.470"},{"key":"1688_CR37","doi-asserted-by":"crossref","unstructured":"Hu H, Yang B, Qiao Z, Liu S, Zhu J, Liu Z, Ding W, Zhao D, Wang H (2023) SeasonDepth: cross-season monocular depth prediction dataset and benchmark under multiple environments. In: 2023 IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 11384\u201311389","DOI":"10.1109\/IROS55552.2023.10341917"},{"key":"1688_CR38","doi-asserted-by":"crossref","unstructured":"Uhrig J, Schneider N, Schneider L, Franke U, Brox T, Geiger A (2017) Sparsity invariant CNNs. In: 2017 international conference on 3D vision (3DV), pp 11\u201320","DOI":"10.1109\/3DV.2017.00012"},{"key":"1688_CR39","doi-asserted-by":"crossref","unstructured":"Garg R, Bg VK, Carneiro G, Reid I (2016) Unsupervised CNN for single view depth estimation: geometry to the rescue. In: Computer vision\u2014ECCV 2016: 14th European conference, Amsterdam, The Netherlands, October 11\u201314, 2016, proceedings, Part VIII 14, pp 740\u2013756","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"1688_CR40","unstructured":"Kim D, Ka W, Ahn P, Joo D, Chun S, Kim J (2022) Global-local path networks for monocular depth estimation with vertical CutDepth. arXiv preprint arXiv:2201.07436"},{"key":"1688_CR41","doi-asserted-by":"crossref","unstructured":"Godard C, Mac\u00a0Aodha O, Brostow GJ (2017) Unsupervised monocular depth estimation with left-right consistency. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 270\u2013279","DOI":"10.1109\/CVPR.2017.699"},{"key":"1688_CR42","doi-asserted-by":"crossref","unstructured":"Karimi F, Mehrpanah A, Rawassizadeh R (2022) LightDepth: a resource efficient depth estimation approach for dealing with ground truth sparsity via curriculum learning. arXiv preprint arXiv:2211.08608","DOI":"10.2139\/ssrn.4531792"},{"key":"1688_CR43","unstructured":"Oquab M, Darcet T, Moutakanni T, Vo H, Szafraniec M, Khalidov V, Fernandez P, Haziza D, Massa F, El-Nouby A et al (2023) DINOv2: learning robust visual features without supervision. arXiv preprint arXiv:2304.07193"},{"key":"1688_CR44","doi-asserted-by":"crossref","unstructured":"Yin W, Zhang C, Chen H, Cai Z, Yu G, Wang K, Chen X, Shen C (2023) Metric3D: towards zero-shot metric 3D prediction from a single image. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 9043\u20139053","DOI":"10.1109\/ICCV51070.2023.00830"},{"key":"1688_CR45","doi-asserted-by":"crossref","unstructured":"Manimaran G, Swaminathan J (2022) Focal-WNet: an architecture unifying convolution and attention for depth estimation. In: 2022 IEEE 7th international conference for convergence in technology (I2CT), pp 1\u20137","DOI":"10.1109\/I2CT54291.2022.9824488"},{"key":"1688_CR46","doi-asserted-by":"crossref","unstructured":"Agarwal A, Arora C (2022) DepthFormer: multiscale vision transformer for monocular depth estimation with global local information fusion. In: 2022 IEEE international conference on image processing (ICIP), pp 3873\u20133877","DOI":"10.1109\/ICIP46576.2022.9897187"},{"key":"1688_CR47","doi-asserted-by":"crossref","unstructured":"Ke B, Obukhov A, Huang S, Metzger N, Daudt RC, Schindler K (2024) Repurposing diffusion-based image generators for monocular depth estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 9492\u20139502","DOI":"10.1109\/CVPR52733.2024.00907"},{"key":"1688_CR48","unstructured":"Bhat SF, Alhashim I, Wonka P (2021) AdaBins: depth estimation using adaptive bins. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 4009\u20134018"},{"key":"1688_CR49","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S et al (2020) An image is worth $$16 \\times 16$$ words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"key":"1688_CR50","doi-asserted-by":"crossref","unstructured":"Ranftl R, Bochkovskiy A, Koltun V (2021) Vision transformers for dense prediction. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 12179\u201312188","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"1688_CR51","doi-asserted-by":"crossref","unstructured":"Cheng S, Xu Z, Zhu S, Li Z, Li LE, Ramamoorthi R, Su H (2020) Deep stereo using adaptive thin volume representation with uncertainty awareness. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2524\u20132534","DOI":"10.1109\/CVPR42600.2020.00260"}],"container-title":["Complex &amp; Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01688-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40747-024-01688-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01688-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,30]],"date-time":"2025-01-30T20:17:06Z","timestamp":1738268226000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40747-024-01688-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,5]]},"references-count":51,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["1688"],"URL":"https:\/\/doi.org\/10.1007\/s40747-024-01688-6","relation":{},"ISSN":["2199-4536","2198-6053"],"issn-type":[{"type":"print","value":"2199-4536"},{"type":"electronic","value":"2198-6053"}],"subject":[],"published":{"date-parts":[[2024,12,5]]},"assertion":[{"value":"29 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 November 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 December 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"66"}}