{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T15:52:19Z","timestamp":1759333939384,"version":"build-2065373602"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"29","license":[{"start":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T00:00:00Z","timestamp":1756857600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T00:00:00Z","timestamp":1756857600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Yokogawa Saudi Arabia","award":["COE 02538"],"award-info":[{"award-number":["COE 02538"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s00521-025-11566-y","type":"journal-article","created":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T16:02:22Z","timestamp":1756915342000},"page":"24265-24280","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing monocular depth estimation with an advanced encoder-decoder architecture"],"prefix":"10.1007","volume":"37","author":[{"given":"Yasser","family":"El-Alfy","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1507-5713","authenticated-orcid":false,"given":"Uthman","family":"Baroudi","sequence":"additional","affiliation":[]},{"given":"Hamzah","family":"Luqman","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,3]]},"reference":[{"key":"11566_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.dsp.2022.103441","volume":"123","author":"A Mertan","year":"2022","unstructured":"Mertan A, Duff DJ, Unal G (2022) Single image depth estimation: an overview. Digit Signal Process 123:103441","journal-title":"Digit Signal Process"},{"key":"11566_CR2","doi-asserted-by":"crossref","unstructured":"Huynh L, Nguyen-Ha P, Matas J, Rahtu E, Heikkil\u00e4 J (2020) Guiding monocular depth estimation using depth-attention volume. In: European Conference on Computer Vision, pp. 581\u2013597. Springer","DOI":"10.1007\/978-3-030-58574-7_35"},{"key":"11566_CR3","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1016\/j.neucom.2020.12.089","volume":"438","author":"Y Ming","year":"2021","unstructured":"Ming Y, Meng X, Fan C, Yu H (2021) Deep learning for monocular depth estimation: a review. Neurocomputing 438:14\u201333","journal-title":"Neurocomputing"},{"key":"11566_CR4","first-page":"126","volume":"11","author":"W Lee","year":"2011","unstructured":"Lee W, Park N, Woo W (2011) Depth-assisted real-time 3d object detection for augmented reality. ICAT 11:126\u2013132","journal-title":"ICAT"},{"key":"11566_CR5","doi-asserted-by":"crossref","unstructured":"Hazirbas C, Ma L, Domokos C, Cremers D (2016) Fusenet: Incorporating depth into semantic segmentation via fusion-based cnn architecture. In: Asian Conference on Computer Vision, pp. 213\u2013228. Springer","DOI":"10.1007\/978-3-319-54181-5_14"},{"key":"11566_CR6","doi-asserted-by":"publisher","first-page":"1862","DOI":"10.4028\/www.scientific.net\/AMM.284-287.1862","volume":"284","author":"KY Chen","year":"2013","unstructured":"Chen KY, Chien CC, Tseng CT (2013) Improving the accuracy of depth estimation in binocular vision for robotic applications. Appl Mech Mater 284:1862\u20131866","journal-title":"Appl Mech Mater"},{"key":"11566_CR7","doi-asserted-by":"crossref","unstructured":"Xue F, Zhuo G, Huang Z, Fu W, Wu Z, Ang MH (2020) Toward hierarchical self-supervised monocular absolute depth estimation for autonomous driving applications. In: 2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 2330\u20132337. IEEE","DOI":"10.1109\/IROS45743.2020.9340802"},{"key":"11566_CR8","doi-asserted-by":"crossref","unstructured":"Forouher D, Besselmann MG, Maehle E (2016) Sensor fusion of depth camera and ultrasound data for obstacle detection and robot navigation. In: 2016 14th International Conference on Control, Automation, Robotics and Vision (ICARCV), pp. 1\u20136. IEEE","DOI":"10.1109\/ICARCV.2016.7838832"},{"key":"11566_CR9","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3160741","author":"X Dong","year":"2022","unstructured":"Dong X, Garratt MA, Anavatti SG, Abbass HA (2022) Towards real-time monocular depth estimation for robotics: a survey. IEEE Trans Intell Transp Syst. https:\/\/doi.org\/10.1109\/TITS.2022.3160741","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"11566_CR10","unstructured":"Kang SB, Webb J, Zitnick CL, Kanade T (1994) An active multibaseline stereo system with real-time image acquisition. Technical report, CARNEGIE-MELLON UNIV PITTSBURGH PA SCHOOL OF COMPUTER SCIENCE"},{"key":"11566_CR11","doi-asserted-by":"publisher","first-page":"230","DOI":"10.1016\/j.media.2018.06.005","volume":"48","author":"F Mahmood","year":"2018","unstructured":"Mahmood F, Durr NJ (2018) Deep learning and conditional random fields-based depth estimation and topographical reconstruction from conventional endoscopy. Med Image Anal 48:230\u2013243","journal-title":"Med Image Anal"},{"issue":"5","key":"11566_CR12","doi-asserted-by":"publisher","first-page":"824","DOI":"10.1109\/TPAMI.2008.132","volume":"31","author":"A Saxena","year":"2008","unstructured":"Saxena A, Sun M, Ng AY (2008) Make3d: learning 3d scene structure from a single still image. IEEE Trans Pattern Anal Mach Intell 31(5):824\u2013840","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11566_CR13","doi-asserted-by":"crossref","unstructured":"Gao Y, Liu W, Yang P, Xu B (2012) Depth estimation based on adaptive support weight and sift for multi-lenslet cameras. In: 6th International Symposium on Advanced Optical Manufacturing and Testing Technologies: Optoelectronic Materials and Devices for Sensing, Imaging, and Solar Energy, vol. 8419, pp. 63\u201366. SPIE","DOI":"10.1117\/12.975694"},{"key":"11566_CR14","doi-asserted-by":"crossref","unstructured":"Liaquat S, Khan US, et al. (2015) Object detection and depth estimation of real world objects using single camera. In: 2015 Fourth International Conference on Aerospace Science and Engineering (ICASE), pp. 1\u20134. IEEE","DOI":"10.1109\/ICASE.2015.7489526"},{"key":"11566_CR15","doi-asserted-by":"crossref","unstructured":"Ranftl R, Bochkovskiy A, Koltun V (2021) Vision transformers for dense prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12179\u201312188","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"11566_CR16","doi-asserted-by":"crossref","unstructured":"Zioulis N, Karakottas A, Zarpalas D, Daras P (2018) Omnidepth: Dense depth estimation for indoors spherical panoramas. In: Proceedings of the European Conference on Computer Vision (ECCV)","DOI":"10.1007\/978-3-030-01231-1_28"},{"key":"11566_CR17","volume":"7","author":"R Ranftl","year":"2020","unstructured":"Ranftl R, Lasinger K, Hafner D, Schindler K, Koltun V (2020) Towards robust monocular depth estimation: mixing datasets for zero-shot cross-dataset transfer. IEEE Trans Pattern Anal Mach Intell 7:9","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11566_CR18","first-page":"9","volume":"27","author":"D Eigen","year":"2014","unstructured":"Eigen D, Puhrsch C, Fergus R (2014) Depth map prediction from a single image using a multi-scale deep network. Adv Neural Inf Process Syst 27:9","journal-title":"Adv Neural Inf Process Syst"},{"key":"11566_CR19","doi-asserted-by":"crossref","unstructured":"Fu H, Gong M, Wang C, Batmanghelich K, Tao D (2018) Deep ordinal regression network for monocular depth estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2002\u20132011","DOI":"10.1109\/CVPR.2018.00214"},{"key":"11566_CR20","doi-asserted-by":"crossref","unstructured":"Xu D, Wang W, Tang H, Liu H, Sebe N, Ricci E (2018) Structured attention guided convolutional neural fields for monocular depth estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3917\u20133925","DOI":"10.1109\/CVPR.2018.00412"},{"issue":"6","key":"11566_CR21","first-page":"1426","volume":"41","author":"E Ricci","year":"2018","unstructured":"Ricci E, Ouyang W, Wang X, Sebe N et al (2018) Monocular depth estimation using multi-scale continuous crfs as sequential deep networks. IEEE Trans Pattern Anal Mach Intell 41(6):1426\u20131440","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"19","key":"11566_CR22","doi-asserted-by":"publisher","first-page":"16423","DOI":"10.1007\/s00521-022-07663-x","volume":"34","author":"S Abdulwahab","year":"2022","unstructured":"Abdulwahab S, Rashwan HA, Garcia MA, Masoumian A, Puig D (2022) Monocular depth map estimation based on a multi-scale deep architecture and curvilinear saliency feature boosting. Neural Comput Appl 34(19):16423\u201316440","journal-title":"Neural Comput Appl"},{"key":"11566_CR23","doi-asserted-by":"publisher","first-page":"11217","DOI":"10.1007\/s00521-020-04702-3","volume":"32","author":"Y Ding","year":"2020","unstructured":"Ding Y, Lin L, Wang L, Zhang M, Li D (2020) Digging into the multi-scale structure for a more refined depth map and 3d reconstruction. Neural Comput Appl 32:11217\u201311228","journal-title":"Neural Comput Appl"},{"key":"11566_CR24","unstructured":"Alhashim I, Wonka P (2018) High quality monocular depth estimation via transfer learning. arXiv e-prints arXiv:abs\/1812.11941"},{"key":"11566_CR25","doi-asserted-by":"crossref","unstructured":"Adz-Dzikri AA, Virgono A, Dirgantara FM (2021) Advance driving assistance systems: Object detection and distance estimation using deep learning. In: 2021 8th International Conference on Electrical Engineering, Computer Science and Informatics (EECSI), pp. 381\u2013386. IEEE","DOI":"10.23919\/EECSI53397.2021.9624218"},{"key":"11566_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109982","volume":"145","author":"S Tang","year":"2024","unstructured":"Tang S, Lu T, Liu X, Zhou H, Zhang Y (2024) Catnet: convolutional attention and transformer for monocular depth estimation. Pattern Recogn 145:109982","journal-title":"Pattern Recogn"},{"key":"11566_CR27","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.107189","volume":"127","author":"MM Haji-Esmaeili","year":"2024","unstructured":"Haji-Esmaeili MM, Montazer G (2024) Large-scale monocular depth estimation in the wild. Eng Appl Artif Intell 127:107189","journal-title":"Eng Appl Artif Intell"},{"key":"11566_CR28","doi-asserted-by":"crossref","unstructured":"Patni S, Agarwal A, Arora C (2024) Ecodepth: Effective conditioning of diffusion models for monocular depth estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 28285\u201328295","DOI":"10.1109\/CVPR52733.2024.02672"},{"key":"11566_CR29","doi-asserted-by":"crossref","unstructured":"Yin Z, Shi J (2018) Geonet: Unsupervised learning of dense depth, optical flow and camera pose. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","DOI":"10.1109\/CVPR.2018.00212"},{"key":"11566_CR30","doi-asserted-by":"crossref","unstructured":"Godard C, Mac\u00a0Aodha O, Brostow GJ (2017) Unsupervised monocular depth estimation with left-right consistency. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 270\u2013279","DOI":"10.1109\/CVPR.2017.699"},{"key":"11566_CR31","doi-asserted-by":"crossref","unstructured":"Yang Y, Wong A, Soatto S (2019) Dense depth posterior (ddp) from single image and sparse range. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3353\u20133362","DOI":"10.1109\/CVPR.2019.00347"},{"key":"11566_CR32","doi-asserted-by":"crossref","unstructured":"Marsal R, Chabot F, Loesch A, Grolleau W, Sahbi H (2024) Monoprob: Self-supervised monocular depth estimation with interpretable uncertainty. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 3637\u20133646","DOI":"10.1109\/WACV57701.2024.00360"},{"key":"11566_CR33","doi-asserted-by":"publisher","first-page":"8031","DOI":"10.1007\/s00521-020-05545-8","volume":"33","author":"X Li","year":"2021","unstructured":"Li X, Hou Y, Wang P, Gao Z, Xu M, Li W (2021) Transformer guided geometry model for flow-based unsupervised visual odometry. Neural Comput Appl 33:8031\u20138042","journal-title":"Neural Comput Appl"},{"key":"11566_CR34","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der\u00a0Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4700\u20134708","DOI":"10.1109\/CVPR.2017.243"},{"issue":"4","key":"11566_CR35","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang Z, Bovik AC, Sheikh HR, Simoncelli EP (2004) Image quality assessment: from error visibility to structural similarity. IEEE Trans Image Process 13(4):600\u2013612","journal-title":"IEEE Trans Image Process"},{"key":"11566_CR36","doi-asserted-by":"crossref","unstructured":"Silberman N, Hoiem D, Kohli P, Fergus R (2012) Indoor segmentation and support inference from rgbd images. In: European Conference on Computer Vision, pp. 746\u2013760. Springer","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"11566_CR37","doi-asserted-by":"crossref","unstructured":"Levin A, Lischinski D, Weiss Y (2004) Colorization using optimization. In: ACM SIGGRAPH 2004 Papers, pp. 689\u2013694","DOI":"10.1145\/1186562.1015780"},{"key":"11566_CR38","doi-asserted-by":"crossref","unstructured":"Laina I, Rupprecht C, Belagiannis V, Tombari F, Navab N (2016) Deeper depth prediction with fully convolutional residual networks. In: 2016 Fourth International Conference on 3D Vision (3DV), pp. 239\u2013248. IEEE","DOI":"10.1109\/3DV.2016.32"},{"key":"11566_CR39","doi-asserted-by":"crossref","unstructured":"Xu D, Ricci E, Ouyang W, Wang X, Sebe N (2017) Multi-scale continuous crfs as sequential deep networks for monocular depth estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5354\u20135362","DOI":"10.1109\/CVPR.2017.25"},{"key":"11566_CR40","doi-asserted-by":"crossref","unstructured":"Hao Z, Li Y, You S, Lu F (2018) Detail preserving depth estimation from a single image using attention guided networks. In: 2018 International Conference on 3D Vision (3DV), pp. 304\u2013313. IEEE","DOI":"10.1109\/3DV.2018.00043"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11566-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-025-11566-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11566-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T05:22:54Z","timestamp":1759209774000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-025-11566-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,3]]},"references-count":40,"journal-issue":{"issue":"29","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["11566"],"URL":"https:\/\/doi.org\/10.1007\/s00521-025-11566-y","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2025,9,3]]},"assertion":[{"value":"16 November 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 August 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 September 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare neither conflict of interest nor conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}