{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:18:51Z","timestamp":1757618331021,"version":"3.44.0"},"reference-count":67,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T00:00:00Z","timestamp":1750118400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T00:00:00Z","timestamp":1750118400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"Natural Science Foundation of Shanghai","award":["22ZR1443700"],"award-info":[{"award-number":["22ZR1443700"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Complex Intell. Syst."],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s40747-025-01967-w","type":"journal-article","created":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T04:35:53Z","timestamp":1750134953000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["DepthRL: a weakly supervised approach for monocular depth estimation using deep reinforcement learning"],"prefix":"10.1007","volume":"11","author":[{"given":"Han","family":"Chen","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3242-0857","authenticated-orcid":false,"given":"Yongxiong","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Jiayi","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Jiapeng","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Zhiqun","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Shuwen","family":"Jia","sequence":"additional","affiliation":[]},{"given":"Shuai","family":"Huang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,6,17]]},"reference":[{"issue":"3","key":"1967_CR1","doi-asserted-by":"publisher","first-page":"1341","DOI":"10.1109\/TITS.2020.2972974","volume":"22","author":"D Feng","year":"2020","unstructured":"Feng D, Haase-Sch\u00fctz C, Rosenbaum L, Hertlein H, Glaeser C, Timm F, Wiesbeck W, Dietmayer K (2020) Deep multi-modal object detection and semantic segmentation for autonomous driving: Datasets, methods, and challenges. IEEE Trans Intell Transport Syst 22(3):1341\u20131360","journal-title":"IEEE Trans Intell Transport Syst"},{"key":"1967_CR2","doi-asserted-by":"crossref","unstructured":"Du R, Turner E, Dzitsiuk M, Prasso L, Duarte I, Dourgarian J, Afonso J, Pascoal J, Gladstone J, Cruces N et al (2020) Depthlab: Real-time 3d interaction with depth maps for mobile augmented reality. In: Proceedings of the 33rd Annual ACM Symposium on User Interface Software and Technology, pp. 829\u2013843","DOI":"10.1145\/3379337.3415881"},{"issue":"5","key":"1967_CR3","doi-asserted-by":"publisher","first-page":"1429","DOI":"10.1109\/TCYB.2013.2275291","volume":"43","author":"H Zhang","year":"2013","unstructured":"Zhang H, Reardon C, Parker LE (2013) Real-time multiple human perception with color-depth cameras on a mobile robot. IEEE Trans Cybern 43(5):1429\u20131441","journal-title":"IEEE Trans Cybern"},{"key":"1967_CR4","doi-asserted-by":"crossref","unstructured":"Awsafur Rahman M, Anowarul Fattah S (2023) Dwinformer: Dual window transformers for end-to-end monocular depth estimation. arXiv e-prints, 2303","DOI":"10.1109\/JSEN.2023.3299782"},{"issue":"23","key":"1967_CR5","doi-asserted-by":"publisher","first-page":"26912","DOI":"10.1109\/JSEN.2021.3120753","volume":"21","author":"Z Cheng","year":"2021","unstructured":"Cheng Z, Zhang Y, Tang C (2021) Swin-depth: using transformers and multi-scale fusion for monocular-based depth estimation. IEEE Sens J 21(23):26912\u201326920","journal-title":"IEEE Sens J"},{"issue":"4","key":"1967_CR6","doi-asserted-by":"publisher","first-page":"1738","DOI":"10.1109\/TPAMI.2020.3032602","volume":"44","author":"H Laga","year":"2020","unstructured":"Laga H, Jospin LV, Boussaid F, Bennamoun M (2020) A survey on deep learning techniques for stereo-based depth estimation. IEEE Trans Pattern Anal Mach Intell 44(4):1738\u20131764","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"11","key":"1967_CR7","doi-asserted-by":"publisher","first-page":"4381","DOI":"10.1109\/TCSVT.2021.3049869","volume":"31","author":"M Song","year":"2021","unstructured":"Song M, Lim S, Kim W (2021) Monocular depth estimation using Laplacian pyramid-based depth residuals. IEEE Trans Circuits Syst Video Technol 31(11):4381\u20134393","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"1967_CR8","unstructured":"Eigen D, Puhrsch C, Fergus R (2014) Depth map prediction from a single image using a multi-scale deep network. In: Proceedings of the 28th International Conference on Neural Information Processing Systems. MIT Press, vol 2, issue 9, pp 2366\u20132374"},{"key":"1967_CR9","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part I 16, pp. 213\u2013229. Springer","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"1967_CR10","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2022.104520","volume":"125","author":"B Wu","year":"2022","unstructured":"Wu B, Wang Y (2022) Rich global feature guided network for monocular depth estimation. Image Vis Comput 125:104520","journal-title":"Image Vis Comput"},{"key":"1967_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.109297","volume":"137","author":"R Li","year":"2023","unstructured":"Li R, Xue D, Su S, He X, Mao Q, Zhu Y, Sun J, Zhang Y (2023) Learning depth via leveraging semantics: self-supervised monocular depth estimation with both implicit and explicit semantic guidance. Pattern Recogn 137:109297","journal-title":"Pattern Recogn"},{"key":"1967_CR12","doi-asserted-by":"crossref","unstructured":"Zhang N, Nex F, Vosselman G, Kerle N (2023) Lite-mono: A lightweight cnn and transformer architecture for self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18537\u201318546","DOI":"10.1109\/CVPR52729.2023.01778"},{"key":"1967_CR13","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110770","volume":"156","author":"D Zhang","year":"2024","unstructured":"Zhang D, Wang C, Wang H, Fu Q (2024) Graph semantic information for self-supervised monocular depth estimation. Pattern Recogn 156:110770","journal-title":"Pattern Recogn"},{"key":"1967_CR14","doi-asserted-by":"crossref","unstructured":"Liu J, Kong L, Li B, Wang Z, Gu H, Chen J (2024) Mono-vifi: A unified learning framework for self-supervised single and multi-frame monocular depth estimation. In: European Conference on Computer Vision, pp. 90\u2013107. Springer","DOI":"10.1007\/978-3-031-72995-9_6"},{"key":"1967_CR15","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2024.105360","volume":"154","author":"L Wu","year":"2025","unstructured":"Wu L, Wang L, Wei G, Yu Y (2025) Hpd-depth: high performance decoding network for self-supervised monocular depth estimation. Image Vis Comput 154:105360","journal-title":"Image Vis Comput"},{"key":"1967_CR16","doi-asserted-by":"crossref","unstructured":"Feng C, Zhang C, Chen Z, Hu W, Lu K, Ge L (2025) Self-supervised monocular depth estimation with dual-path encoders and offset field interpolation. IEEE Trans Image Process 34:939\u2013954","DOI":"10.1109\/TIP.2025.3533207"},{"key":"1967_CR17","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"issue":"4","key":"1967_CR18","doi-asserted-by":"publisher","first-page":"10969","DOI":"10.1109\/LRA.2022.3196781","volume":"7","author":"D Han","year":"2022","unstructured":"Han D, Shin J, Kim N, Hwang S, Choi Y (2022) Transdssl: transformer based depth estimation via self-supervised learning. IEEE Robot Autom Lett 7(4):10969\u201310976","journal-title":"IEEE Robot Autom Lett"},{"key":"1967_CR19","unstructured":"Lee JH, Han M-K, Ko DW, Suh IH (2019) From big to small: multi-scale local planar guidance for monocular depth estimation. arXiv preprint arXiv:1907.10326"},{"key":"1967_CR20","doi-asserted-by":"crossref","unstructured":"Liu S, Qi L, Qin H, Shi J, Jia J (2018) Path aggregation network for instance segmentation. In: Proceedings of the IEEE Conference on computer vision and pattern recognition, pp. 8759\u20138768","DOI":"10.1109\/CVPR.2018.00913"},{"key":"1967_CR21","doi-asserted-by":"crossref","unstructured":"Tan M, Pang R, Le QV (2020) Efficientdet: scalable and efficient object detection. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp. 10781\u201310790","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"1967_CR22","doi-asserted-by":"crossref","unstructured":"Jahnavi T, Vasundhara D (2022) Segmentation of medical images using u-net++. In: 2022 4th International Conference on Advances in Computing, Communication Control and Networking (ICAC3N), pp. 801\u2013807. IEEE","DOI":"10.1109\/ICAC3N56670.2022.10074438"},{"key":"1967_CR23","doi-asserted-by":"crossref","unstructured":"Jaritz M, De Charette R, Wirbel E, Perrotton X, Nashashibi F (2018) Sparse and dense data with cnns: Depth completion and semantic segmentation. In: 2018 International Conference on 3D Vision (3DV), pp. 52\u201360. IEEE","DOI":"10.1109\/3DV.2018.00017"},{"key":"1967_CR24","doi-asserted-by":"crossref","unstructured":"Qiu J, Cui Z, Zhang Y, Zhang X, Liu S, Zeng B, Pollefeys M (2019) Deeplidar: deep surface normal guided depth prediction for outdoor scene from sparse lidar data and single color image. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp. 3313\u20133322","DOI":"10.1109\/CVPR.2019.00343"},{"key":"1967_CR25","doi-asserted-by":"crossref","unstructured":"Li X, Zhao H, Han L, Tong Y, Tan S, Yang K (2020) Gated fully fusion for semantic segmentation. In: Proceedings of the AAAI Conference on artificial intelligence 34:11418\u201311425","DOI":"10.1609\/aaai.v34i07.6805"},{"key":"1967_CR26","doi-asserted-by":"publisher","first-page":"5264","DOI":"10.1109\/TIP.2021.3079821","volume":"30","author":"S Zhao","year":"2021","unstructured":"Zhao S, Gong M, Fu H, Tao D (2021) Adaptive context-aware multi-modal network for depth completion. IEEE Trans Image Process 30:5264\u20135276","journal-title":"IEEE Trans Image Process"},{"key":"1967_CR27","doi-asserted-by":"crossref","unstructured":"Bernstein AV, Burnaev E, Kachan ON (2018) Reinforcement learning for computer vision and robot navigation. In: Machine learning and data mining in pattern recognition: 14th International Conference, MLDM 2018, New York, NY, USA, July 15-19, 2018, Proceedings, Part II 14, pp. 258\u2013272 . Springer","DOI":"10.1007\/978-3-319-96133-0_20"},{"key":"1967_CR28","doi-asserted-by":"crossref","unstructured":"Rao K, Harris C, Irpan A, Levine S, Ibarz J, Khansari M (2020) Rl-cyclegan: Reinforcement learning aware simulation-to-real. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp. 11157\u201311166","DOI":"10.1109\/CVPR42600.2020.01117"},{"issue":"2","key":"1967_CR29","doi-asserted-by":"publisher","first-page":"1543","DOI":"10.1007\/s10462-022-10205-5","volume":"56","author":"V Uc-Cetina","year":"2023","unstructured":"Uc-Cetina V, Navarro-Guerrero N, Martin-Gonzalez A, Weber C, Wermter S (2023) Survey on reinforcement learning for language processing. Artif Intell Rev 56(2):1543\u20131575","journal-title":"Artif Intell Rev"},{"key":"1967_CR30","doi-asserted-by":"crossref","unstructured":"Lu J, Qin M, Tong Y (2022) Research on reinforcement learning algorithms in computer vision. In: 2022 International Symposium on Advances in Informatics, Electronics and Education (ISAIEE), pp. 246\u2013251. IEEE","DOI":"10.1109\/ISAIEE57420.2022.00057"},{"key":"1967_CR31","doi-asserted-by":"crossref","unstructured":"Le N, Rathour VS, Yamazaki K, Luu K, Savvides M (2022) Deep reinforcement learning in computer vision: a comprehensive survey. Artif Intell Rev. 55(4):1\u201387. Springer","DOI":"10.1007\/s10462-021-10061-9"},{"key":"1967_CR32","doi-asserted-by":"crossref","unstructured":"Fu H, Gong M, Wang C, Batmanghelich K, Tao D (2018) Deep ordinal regression network for monocular depth estimation. In: Proceedings of the IEEE Conference on computer vision and pattern recognition, pp. 2002\u20132011","DOI":"10.1109\/CVPR.2018.00214"},{"issue":"10","key":"1967_CR33","doi-asserted-by":"publisher","first-page":"2024","DOI":"10.1109\/TPAMI.2015.2505283","volume":"38","author":"F Liu","year":"2015","unstructured":"Liu F, Shen C, Lin G, Reid I (2015) Learning depth from single monocular images using deep convolutional neural fields. IEEE Trans Pattern Anal Mach Intell 38(10):2024\u20132039","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"3","key":"1967_CR34","doi-asserted-by":"publisher","first-page":"127","DOI":"10.36548\/jiip.2022.3.001","volume":"4","author":"SM Buddhacharya","year":"2022","unstructured":"Buddhacharya SM, Adhikari R, Maharjan N, Panday SP (2022) Monocular depth estimation using a multi-grid attention-based model. J Innov Image Processi 4(3):127\u2013146","journal-title":"J Innov Image Processi"},{"key":"1967_CR35","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.110308","volume":"263","author":"W Yan","year":"2023","unstructured":"Yan W, Dong L, Ma W, Mi Q, Zha H (2023) Dsc-mde: dual structural contexts for monocular depth estimation. Knowl-Based Syst 263:110308","journal-title":"Knowl-Based Syst"},{"key":"1967_CR36","unstructured":"Gao F, Wang J, Yu J, Wang Y, Shuang F (2021) A weakly-supervised depth estimation network using attention mechanism. arXiv preprint arXiv:2107.04819"},{"issue":"4","key":"1967_CR37","doi-asserted-by":"publisher","first-page":"2396","DOI":"10.1109\/TPAMI.2023.3330944","volume":"46","author":"V Arampatzakis","year":"2023","unstructured":"Arampatzakis V, Pavlidis G, Mitianoudis N, Papamarkos N (2023) Monocular depth estimation: a thorough review. IEEE Trans Pattern Anal Mach Intell 46(4):2396\u20132414","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"5","key":"1967_CR38","doi-asserted-by":"publisher","first-page":"053013","DOI":"10.1117\/1.JEI.29.5.053013","volume":"29","author":"Z Zhang","year":"2020","unstructured":"Zhang Z, Qiao J, Lin S, Liu H (2020) Weakly supervised monocular depth estimation method based on stereo matching labels. J Electron Imaging 29(5):053013\u2013053013","journal-title":"J Electron Imaging"},{"key":"1967_CR39","doi-asserted-by":"crossref","unstructured":"Ren H, Raj A, El-Khamy M, Lee J (2020) Suw-learn: joint supervised, unsupervised, weakly supervised deep learning for monocular depth estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, pp. 750\u2013751","DOI":"10.1109\/CVPRW50498.2020.00383"},{"issue":"8","key":"1967_CR40","doi-asserted-by":"publisher","first-page":"2272","DOI":"10.3390\/s20082272","volume":"20","author":"F Khan","year":"2020","unstructured":"Khan F, Salahuddin S, Javidnia H (2020) Deep learning-based monocular depth estimation methods\u2013a state-of-the-art review. Sensors 20(8):2272","journal-title":"Sensors"},{"key":"1967_CR41","doi-asserted-by":"crossref","unstructured":"Pan L, Hartley R, Liu L, Xu Z, Chowdhury S, Yang Y, Zhang H, Li H, Liu M (2024) Weakly-supervised depth estimation and image deblurring via dual-pixel sensors. IEEE Trans Pattern Anal Mach Intell 46(12):11314\u201311330. IEEE","DOI":"10.1109\/TPAMI.2024.3458974"},{"key":"1967_CR42","unstructured":"Xu J, Bai Y, Liu X, Jiang J, Ji X (2021) Weakly-supervised monocular depth estimation with resolution-mismatched data. arXiv preprint arXiv:2109.11573"},{"key":"1967_CR43","doi-asserted-by":"crossref","unstructured":"Li B, Shen C, Dai Y, Van Den Hengel A, He M (2015) Depth and surface normal estimation from monocular images using regression on deep features and hierarchical crfs. In: Proceedings of the IEEE Conference on computer vision and pattern recognition, pp. 1119\u20131127","DOI":"10.1109\/CVPR.2015.7298715"},{"issue":"7","key":"1967_CR44","doi-asserted-by":"publisher","first-page":"4841","DOI":"10.1109\/TCSVT.2021.3128505","volume":"32","author":"X Meng","year":"2021","unstructured":"Meng X, Fan C, Ming Y, Yu H (2021) Cornet: context-based ordinal regression network for monocular depth estimation. IEEE Trans Circuits Syst Video Technol 32(7):4841\u20134853","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"1967_CR45","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2024.105308","volume":"152","author":"H Khan","year":"2024","unstructured":"Khan H, Usman MT, Rida I, Koo J (2024) Attention enhanced machine instinctive vision with human-inspired saliency detection. Image Vis Comput 152:105308","journal-title":"Image Vis Comput"},{"key":"1967_CR46","doi-asserted-by":"crossref","unstructured":"Shi W, Caballero J, Husz\u00e1r F, Totz J, Aitken AP, Bishop R, Rueckert D, Wang Z (2016) Real-time single image and video super-resolution using an efficient sub-pixel convolutional neural network. In: Proceedings of the IEEE Conference on computer vision and pattern recognition, pp. 1874\u20131883","DOI":"10.1109\/CVPR.2016.207"},{"key":"1967_CR47","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2024.105195","volume":"149","author":"H Khan","year":"2024","unstructured":"Khan H, Ullah I, Shabaz M, Omer MF, Usman MT, Guellil MS, Koo J (2024) Visionary vigilance: optimized yolov8 for fallen person detection with large-scale benchmark dataset. Image Vis Comput 149:105195","journal-title":"Image Vis Comput"},{"key":"1967_CR48","unstructured":"Usman MT, Khan H, Singh SK, Lee MY, Koo J (2024) Efficient deepfake detection via layer-frozen assisted dual attention network for consumer imaging devices. IEEE Trans Consum Electron"},{"key":"1967_CR49","doi-asserted-by":"crossref","unstructured":"Wang W, Xie E, Li X, Fan D-P, Song K, Liang D, Lu T, Luo P, Shao L (2021) Pyramid vision transformer: A versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on computer vision, pp. 568\u2013578","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"1967_CR50","doi-asserted-by":"crossref","unstructured":"Geiger A, Lenz P, Urtasun R (2012) Are we ready for autonomous driving? the Kitti vision benchmark suite. In: 2012 IEEE Conference on computer vision and pattern recognition, pp. 3354\u20133361. IEEE","DOI":"10.1109\/CVPR.2012.6248074"},{"issue":"7576","key":"1967_CR51","first-page":"746","volume":"5","author":"N Silberman","year":"2012","unstructured":"Silberman N, Hoiem D, Kohli P, Fergus R (2012) Indoor segmentation and support inference from rgbd images. ECCV 5(7576):746\u2013760","journal-title":"ECCV"},{"key":"1967_CR52","doi-asserted-by":"crossref","unstructured":"Garg R, Bg VK, Carneiro G, Reid I (2016) Unsupervised cnn for single view depth estimation: geometry to the rescue. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11-14, 2016, Proceedings, Part VIII 14, pp. 740\u2013756. Springer","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"1967_CR53","unstructured":"Paszke A, Gross S, Massa F, Lerer A, Bradbury J, Chanan G, Killeen T, Lin Z, Gimelshein N, Antiga L, Desmaison A, Kopf A, Yang E, DeVito Z, Raison M, Tejani A, Chilamkurthy S, Steiner B, Fang, Bai J, Chintala S (2019) Pytorch: an imperative style, high-performance deep learning library. In: Proceedings of the 33rd International Conference on Neural Information Processing Systems, vol 32, pp 8024\u20138035"},{"key":"1967_CR54","first-page":"12077","volume":"34","author":"E Xie","year":"2021","unstructured":"Xie E, Wang W, Yu Z, Anandkumar A, Alvarez JM, Luo P (2021) Segformer: simple and efficient design for semantic segmentation with transformers. Adv Neural Inf Process Syst 34:12077\u201312090","journal-title":"Adv Neural Inf Process Syst"},{"key":"1967_CR55","unstructured":"Kingma DP, Ba J (2014) Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980"},{"key":"1967_CR56","unstructured":"Bhat SF, Alhashim I, Wonka P (2021) Adabins: depth estimation using adaptive bins. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp. 4009\u20134018"},{"key":"1967_CR57","doi-asserted-by":"crossref","unstructured":"Agarwal A, Arora C (2022) Depthformer: multiscale vision transformer for monocular depth estimation with global local information fusion. In: 2022 IEEE International Conference on Image Processing (ICIP), pp. 3873\u20133877 . IEEE","DOI":"10.1109\/ICIP46576.2022.9897187"},{"key":"1967_CR58","doi-asserted-by":"crossref","unstructured":"Yuan W, Gu X, Dai Z, Zhu S, Tan P (2022) New crfs: neural window fully-connected crfs for monocular depth estimation. arXiv preprint arXiv:2203.01502","DOI":"10.1109\/CVPR52688.2022.00389"},{"key":"1967_CR59","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2023.103753","volume":"90","author":"Q Wang","year":"2023","unstructured":"Wang Q, Piao Y (2023) Depth estimation of supervised monocular images based on semantic segmentation. J Vis Commun Image Represent 90:103753","journal-title":"J Vis Commun Image Represent"},{"key":"1967_CR60","doi-asserted-by":"crossref","unstructured":"Li Z, Chen Z, Liu X, Jiang J (2023) Depthformer: exploiting long-range correlation and local information for accurate monocular depth estimation. Mach Intell Res 20(6):1\u201318. Springer","DOI":"10.1007\/s11633-023-1458-0"},{"key":"1967_CR61","doi-asserted-by":"crossref","unstructured":"Agarwal A, Arora C (2023) Attention attention everywhere: monocular depth prediction with skip attention. In: Proceedings of the IEEE\/CVF Winter Conference on applications of computer vision, pp. 5861\u20135870","DOI":"10.1109\/WACV56688.2023.00581"},{"issue":"4","key":"1967_CR62","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1007\/s00138-024-01560-0","volume":"35","author":"H Chen","year":"2024","unstructured":"Chen H, Wang Y (2024) Chfnet: a coarse-to-fine hierarchical refinement model for monocular depth estimation. Mach Vis Appl 35(4):78","journal-title":"Mach Vis Appl"},{"key":"1967_CR63","doi-asserted-by":"crossref","unstructured":"Li G, Zhi Z, Ling BW-K (2024) Globaldepth: a global information aggregation network for depth estimation. IEEE Trans Circuits Syst II Express Briefs 71(6):3201\u20133205. IEEE","DOI":"10.1109\/TCSII.2024.3354068"},{"key":"1967_CR64","doi-asserted-by":"publisher","DOI":"10.1016\/j.swevo.2024.101837","volume":"93","author":"Z Yu","year":"2025","unstructured":"Yu Z, Zhang H, Liu R, Dai S, Chen X, Sheng W, Jin Y (2025) Mde-evonas: automatic network architecture design for monocular depth estimation via evolutionary neural architecture search. Swarm Evol Comput 93:101837","journal-title":"Swarm Evol Comput"},{"key":"1967_CR65","doi-asserted-by":"crossref","unstructured":"Kuznietsov Y, Stuckler J, Leibe B (2017) Semi-supervised deep learning for monocular depth map prediction. In: Proceedings of the IEEE Conference on computer vision and pattern recognition, pp. 6647\u20136655","DOI":"10.1109\/CVPR.2017.238"},{"key":"1967_CR66","doi-asserted-by":"crossref","unstructured":"Godard C, Mac Aodha O, Brostow GJ (2017) Unsupervised monocular depth estimation with left-right consistency. In: Proceedings of the IEEE Conference on computer vision and pattern recognition, pp. 270\u2013279","DOI":"10.1109\/CVPR.2017.699"},{"key":"1967_CR67","unstructured":"Oktay O, Schlemper J, Folgoc LL, Lee M, Heinrich M, Misawa K, Mori K, McDonagh S, Hammerla NY, Kainz B, et al (2018) Attention u-net: learning where to look for the pancreas. arXiv preprint arXiv:1804.03999"}],"container-title":["Complex &amp; Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-025-01967-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40747-025-01967-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-025-01967-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T21:12:50Z","timestamp":1757193170000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40747-025-01967-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,17]]},"references-count":67,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["1967"],"URL":"https:\/\/doi.org\/10.1007\/s40747-025-01967-w","relation":{},"ISSN":["2199-4536","2198-6053"],"issn-type":[{"type":"print","value":"2199-4536"},{"type":"electronic","value":"2198-6053"}],"subject":[],"published":{"date-parts":[[2025,6,17]]},"assertion":[{"value":"20 February 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 May 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 June 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"No authors have relevant financial or non-financial interests to disclose. The authors have no relevant financial or non-financial interests to disclose. On behalf of all authors, the corresponding author states that there is no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This article does not contain any studies with human participants or animals performed by any of the authors.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"All authors volunteer to participate in this study. All authors have understood the purpose and process of the study. All authors read and approved the final manuscript.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"All authors consent to the use of non-identifying information provided in this study for scholarly publications and presentations.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}],"article-number":"344"}}