{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T13:25:56Z","timestamp":1776777956618,"version":"3.51.2"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"15","license":[{"start":{"date-parts":[[2020,2,3]],"date-time":"2020-02-03T00:00:00Z","timestamp":1580688000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,2,3]],"date-time":"2020-02-03T00:00:00Z","timestamp":1580688000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Zhejiang Provincial Science Foundation of China","award":["LY18F010004"],"award-info":[{"award-number":["LY18F010004"]}]},{"name":"Major Scientific Project of Zhejiang Lab","award":["2018DD0ZX01"],"award-info":[{"award-number":["2018DD0ZX01"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2020,8]]},"DOI":"10.1007\/s00521-020-04702-3","type":"journal-article","created":{"date-parts":[[2020,2,3]],"date-time":"2020-02-03T20:03:41Z","timestamp":1580760221000},"page":"11217-11228","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Digging into the multi-scale structure for a more refined depth map and 3D reconstruction"],"prefix":"10.1007","volume":"32","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9375-9400","authenticated-orcid":false,"given":"Yinzhang","family":"Ding","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lu","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lianghao","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ming","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dongxiao","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,2,3]]},"reference":[{"issue":"3","key":"4702_CR1","doi-asserted-by":"publisher","first-page":"1281","DOI":"10.1007\/s11063-018-9781-0","volume":"48","author":"C Li","year":"2018","unstructured":"Li C, Lu B, Zhang Y et al (2018) 3D reconstruction of indoor scenes via image registration. Neural Process Lett 48(3):1281\u20131304","journal-title":"Neural Process Lett"},{"issue":"3","key":"4702_CR2","doi-asserted-by":"publisher","first-page":"735","DOI":"10.1007\/s00521-018-03971-3","volume":"32","author":"S Dong","year":"2020","unstructured":"Dong S, Gao Z, Pirbhulal S et al (2020) IoT-based 3D convolution for video salient object detection. Neural Comput Appl 32(3):735\u2013746","journal-title":"Neural Comput Appl"},{"key":"4702_CR3","first-page":"1","volume":"2019","author":"J Li","year":"2019","unstructured":"Li J, Zhang Y, Chen Z et al  (2019) A novel edge-enabled slam solution using projected depth image information. Neural Comput Appl 2019:1\u201313","journal-title":"Neural Comput Appl"},{"issue":"1","key":"4702_CR4","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1007\/s11263-005-4839-7","volume":"68","author":"R Vidal","year":"2006","unstructured":"Vidal R, Ma Y, Soatto S, Sastry S (2006) Two-view multibody structure from motion. Int J Comput Vis 68(1):7\u201325","journal-title":"Int J Comput Vis"},{"key":"4702_CR5","doi-asserted-by":"crossref","unstructured":"Ronneberger O, Fischer P, Brox T (2015) U-net: convolutional networks for biomedical image segmentation. In: International conference on medical image computing and computer-assisted intervention. Springer, pp 234\u2013241","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"4702_CR6","doi-asserted-by":"crossref","unstructured":"Yu F, Koltun V, Funkhouser T (2017) Dilated residual networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 472\u2013480","DOI":"10.1109\/CVPR.2017.75"},{"key":"4702_CR7","unstructured":"Yu F, Koltun V (2015) Multi-scale context aggregation by dilated convolutions. arXiv:1511.07122"},{"key":"4702_CR8","unstructured":"Kendall A, Gal Y (2017) What uncertainties do we need in Bayesian deep learning for computer vision? In: Advances in neural information processing systems, pp 5574\u20135584"},{"key":"4702_CR9","doi-asserted-by":"crossref","unstructured":"Tateno K, Tombari F, Laina I, Navab N (2017) Cnn-slam: real-time dense monocular slam with learned depth prediction. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), vol\u00a02","DOI":"10.1109\/CVPR.2017.695"},{"issue":"5","key":"4702_CR10","doi-asserted-by":"publisher","first-page":"1255","DOI":"10.1109\/TRO.2017.2705103","volume":"33","author":"R Mur-Artal","year":"2017","unstructured":"Mur-Artal R, Tard\u00f3s JD (2017) Orb-slam2: an open-source slam system for monocular, stereo, and rgb-d cameras. IEEE Trans Robot 33(5):1255\u20131262","journal-title":"IEEE Trans Robot"},{"key":"4702_CR11","unstructured":"Eigen D, Puhrsch C, Fergus R (2014) Depth map prediction from a single image using a multi-scale deep network. In: Advances in neural information processing systems, pp 2366\u20132374"},{"key":"4702_CR12","doi-asserted-by":"crossref","unstructured":"Eigen D, Fergus R (2015) Predicting depth, surface normals and semantic labels with a common multi-scale convolutional architecture. In: Proceedings of the IEEE international conference on computer vision, pp 2650\u20132658","DOI":"10.1109\/ICCV.2015.304"},{"issue":"10","key":"4702_CR13","doi-asserted-by":"publisher","first-page":"2024","DOI":"10.1109\/TPAMI.2015.2505283","volume":"38","author":"F Liu","year":"2016","unstructured":"Liu F, Shen C, Lin G, Reid ID (2016) Learning depth from single monocular images using deep convolutional neural fields. IEEE Trans Pattern Anal Mach Intell 38(10):2024\u20132039","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"4702_CR14","doi-asserted-by":"crossref","unstructured":"Li B, Shen C, Dai Y, Van Den\u00a0Hengel A, He M (2015) Depth and surface normal estimation from monocular images using regression on deep features and hierarchical crfs. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1119\u20131127","DOI":"10.1109\/CVPR.2015.7298715"},{"key":"4702_CR15","doi-asserted-by":"crossref","unstructured":"Wang P, Shen X, Lin Z, Cohen S, Price B, Yuille AL (2015) Towards unified depth and semantic prediction from a single image. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2800\u20132809","DOI":"10.1109\/CVPR.2015.7298897"},{"key":"4702_CR16","doi-asserted-by":"crossref","unstructured":"Laina I, Rupprecht C, Belagiannis V, Tombari F, Navab N (2016) Deeper depth prediction with fully convolutional residual networks. In: 2016 fourth international conference on 3D vision (3DV). IEEE, pp 239\u2013248","DOI":"10.1109\/3DV.2016.32"},{"key":"4702_CR17","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"4702_CR18","doi-asserted-by":"crossref","unstructured":"Garg R, Vijay Kumar BG, Carneiro G, Reid I (2016) Unsupervised cnn for single view depth estimation: Geometry to the rescue. In: European conference on computer vision. Springer, pp 740\u2013756","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"4702_CR19","doi-asserted-by":"crossref","unstructured":"Godard C, Mac\u00a0Aodha O, Brostow GJ (2017) Unsupervised monocular depth estimation with left-right consistency. In: 2017 IEEE conference on computer vision and pattern recognition (CVPR). IEEE, pp 6602\u20136611","DOI":"10.1109\/CVPR.2017.699"},{"key":"4702_CR20","unstructured":"Vijayanarasimhan S, Ricco S, Schmid C, Sukthankar R, Fragkiadaki K (2017) Sfm-net: learning of structure and motion from video. arXiv:1704.07804"},{"key":"4702_CR21","doi-asserted-by":"crossref","unstructured":"Wang C, Miguel\u00a0Buenaposada J, Zhu R, Lucey S (2018) Learning depth from monocular videos using direct methods. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2022\u20132030","DOI":"10.1109\/CVPR.2018.00216"},{"key":"4702_CR22","doi-asserted-by":"crossref","unstructured":"Poggi M, Tosi F, Mattoccia S (2018) Learning monocular depth estimation with unsupervised trinocular assumptions. In: 2018 international conference on 3D vision (3DV). IEEE, pp 324\u2013333","DOI":"10.1109\/3DV.2018.00045"},{"issue":"6","key":"4702_CR23","first-page":"7","volume":"2","author":"T Zhou","year":"2017","unstructured":"Zhou T, Brown M, Snavely N, Lowe DG (2017) Unsupervised learning of depth and ego-motion from video. CVPR 2(6):7","journal-title":"CVPR"},{"key":"4702_CR24","doi-asserted-by":"crossref","unstructured":"Repala VK, Dubey SR (2018) Dual cnn models for unsupervised monocular depth estimation. arXiv:1804.06324","DOI":"10.1007\/978-3-030-34869-4_23"},{"issue":"11","key":"4702_CR25","doi-asserted-by":"publisher","first-page":"3174","DOI":"10.1109\/TCSVT.2017.2740321","volume":"28","author":"Y Cao","year":"2018","unstructured":"Cao Y, Wu Z, Shen C (2018) Estimating depth from monocular images as classification using deep fully convolutional residual networks. IEEE Trans Circuits Syst Video Technol 28(11):3174\u20133182","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"4702_CR26","doi-asserted-by":"publisher","first-page":"328","DOI":"10.1016\/j.patcog.2018.05.029","volume":"83","author":"B Li","year":"2018","unstructured":"Li B, Dai Y, He M (2018) Monocular depth estimation with hierarchical fusion of dilated cnns and soft-weighted-sum inference. Pattern Recognit 83:328\u2013339","journal-title":"Pattern Recognit"},{"key":"4702_CR27","doi-asserted-by":"crossref","unstructured":"Li R, Xian K, Shen C, Cao Z, Lu H, Hang L (2018) Deep attention-based classification network for robust depth prediction. arXiv:1807.03959","DOI":"10.1007\/978-3-030-20870-7_41"},{"key":"4702_CR28","doi-asserted-by":"crossref","unstructured":"Fu H, Gong M, Wang C, Batmanghelich K, Tao D (2018) Deep ordinal regression network for monocular depth estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2002\u20132011","DOI":"10.1109\/CVPR.2018.00214"},{"key":"4702_CR29","doi-asserted-by":"crossref","unstructured":"Mahjourian R, Wicke M, Angelova A (2018) Unsupervised learning of depth and ego-motion from monocular video using 3d geometric constraints. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5667\u20135675","DOI":"10.1109\/CVPR.2018.00594"},{"key":"4702_CR30","doi-asserted-by":"crossref","unstructured":"Yin Z, Shi J (2018) Geonet: unsupervised learning of dense depth, optical flow and camera pose. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), vol\u00a02","DOI":"10.1109\/CVPR.2018.00212"},{"key":"4702_CR31","doi-asserted-by":"crossref","unstructured":"Zou Y, Luo Z, Huang J-B (2018) Df-net: unsupervised joint learning of depth and flow using cross-task consistency. In: Proceedings of the European conference on computer vision (ECCV), pp 36\u201353","DOI":"10.1007\/978-3-030-01228-1_3"},{"key":"4702_CR32","doi-asserted-by":"crossref","unstructured":"Jiao J, Cao Y, Song Y, Lau R (2018) Look deeper into depth: monocular depth estimation with semantic booster and attention-driven loss. In: Proceedings of the European conference on computer vision (ECCV), pp 53\u201369","DOI":"10.1007\/978-3-030-01267-0_4"},{"key":"4702_CR33","doi-asserted-by":"crossref","unstructured":"Zhang Z, Cui Z, Xu C, Jie Z, Li X, Yang J (2018) Joint task-recursive learning for semantic segmentation and depth estimation,. In: European conference on computer vision. Springer, pp 238\u2013255","DOI":"10.1007\/978-3-030-01249-6_15"},{"key":"4702_CR34","unstructured":"Babu V, Majumder A, Das K, Kumar S et\u00a0al (2018) A deeper insight into the undemon: unsupervised deep network for depth and ego-motion estimation. arXiv:1809.00969"},{"key":"4702_CR35","doi-asserted-by":"crossref","unstructured":"Tong T, Li G, Liu X, Gao Q (2017) Image super-resolution using dense skip connections. In: 2017 IEEE international conference on computer vision (ICCV). IEEE, pp 4809\u20134817","DOI":"10.1109\/ICCV.2017.514"},{"key":"4702_CR36","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1016\/j.neucom.2018.09.061","volume":"322","author":"Y Ji","year":"2018","unstructured":"Ji Y, Zhang H, Wu QJ (2018) Salient object detection via multi-scale attention cnn. Neurocomputing 322:130\u2013140","journal-title":"Neurocomputing"},{"key":"4702_CR37","doi-asserted-by":"crossref","unstructured":"Chen L-C, Zhu Y, Papandreou G, Schroff F, Adam H (2018) Encoder\u2013decoder with atrous separable convolution for semantic image segmentation. In: The European conference on computer vision (ECCV)","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"4702_CR38","doi-asserted-by":"crossref","unstructured":"Moukari M, Picard S, Simoni L, Jurie F (2018) Deep multi-scale architectures for monocular depth estimation. In: 2018 25th IEEE international conference on image processing (ICIP). IEEE, pp 2940\u20132944","DOI":"10.1109\/ICIP.2018.8451408"},{"key":"4702_CR39","unstructured":"Blundell C, Cornebise J, Kavukcuoglu K, Wierstra D (2015) Weight uncertainty in neural networks. arXiv:1505.05424"},{"key":"4702_CR40","unstructured":"Gal Y, Ghahramani Z (2016) Dropout as a Bayesian approximation: representing model uncertainty in deep learning. In: International conference on machine learning, pp 1050\u20131059"},{"key":"4702_CR41","doi-asserted-by":"crossref","unstructured":"Engel J, Sch\u00f6ps T, Cremers D (2014) Lsd-slam: large-scale direct monocular slam. In: European conference on computer vision. Springer, pp 834\u2013849","DOI":"10.1007\/978-3-319-10605-2_54"},{"key":"4702_CR42","doi-asserted-by":"crossref","unstructured":"Yang N, Wang R, St\u00fcckler J, Cremers D (2018) Deep virtual stereo odometry: leveraging deep depth prediction for monocular direct sparse odometry. In: European conference on computer vision. Springer, pp 835\u2013852","DOI":"10.1007\/978-3-030-01237-3_50"},{"key":"4702_CR43","unstructured":"Yu F, Koltun V (2016) Multi-scale context aggregation by dilated convolutions. In: ICLR"},{"key":"4702_CR44","unstructured":"Zwald L, Lambert-Lacroix S (2012) The berhu penalty and the grouped effect. arXiv:1207.6868"},{"key":"4702_CR45","doi-asserted-by":"crossref","unstructured":"Xu D, Wang W, Tang H, Liu H, Sebe N, Ricci E (2018) Structured attention guided convolutional neural fields for monocular depth estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3917\u20133925","DOI":"10.1109\/CVPR.2018.00412"},{"key":"4702_CR46","doi-asserted-by":"crossref","unstructured":"Xu D, Ricci E, Ouyang W, Wang X, Sebe N (2017) Multi-scale continuous crfs as sequential deep networks for monocular depth estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), vol\u00a01","DOI":"10.1109\/CVPR.2017.25"},{"key":"4702_CR47","doi-asserted-by":"crossref","unstructured":"Nathan\u00a0Silberman PK, Hoiem D, Fergus R (2012) Indoor segmentation and support inference from rgbd images. In: ECCV","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"4702_CR48","doi-asserted-by":"crossref","unstructured":"Sturm J, Engelhard N, Endres F, Burgard W, Cremers D (2012) A benchmark for the evaluation of rgb-d slam systems. In: 2012 IEEE\/RSJ international conference on intelligent robots and systems (IROS). IEEE, pp 573\u2013580","DOI":"10.1109\/IROS.2012.6385773"},{"key":"4702_CR49","doi-asserted-by":"crossref","unstructured":"Levin A, Lischinski D, Weiss Y (2004) Colorization using optimization. In: ACM transactions on graphics (tog), vol\u00a023, no\u00a03. ACM, pp 689\u2013694","DOI":"10.1145\/1015706.1015780"},{"issue":"3","key":"4702_CR50","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S, Huang Z, Karpathy A, Khosla A, Bernstein M, Berg AC, Fei-Fei L (2015) ImageNet large scale visual recognition challenge. Int J Comput Vis (IJCV) 115(3):211\u2013252","journal-title":"Int J Comput Vis (IJCV)"},{"key":"4702_CR51","doi-asserted-by":"crossref","unstructured":"Pizzoli M, Forster C, Scaramuzza D (2014) Remode: probabilistic, monocular dense reconstruction in real time. In: 2014 IEEE international conference on robotics and automation (ICRA). IEEE, pp 2609\u20132616","DOI":"10.1109\/ICRA.2014.6907233"},{"key":"4702_CR52","doi-asserted-by":"crossref","unstructured":"Concha\u00a0Belenguer A, Civera\u00a0Sancho J (2015) Dpptam: dense piecewise planar tracking and mapping from a monocular sequence. In: Proceedings of IEEE\/RSJ international conference on intelligent robotic systems, no. ART-2015-92153","DOI":"10.1109\/IROS.2015.7354184"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-020-04702-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-020-04702-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-020-04702-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T08:24:04Z","timestamp":1749803044000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-020-04702-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,2,3]]},"references-count":52,"journal-issue":{"issue":"15","published-print":{"date-parts":[[2020,8]]}},"alternative-id":["4702"],"URL":"https:\/\/doi.org\/10.1007\/s00521-020-04702-3","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,2,3]]},"assertion":[{"value":"21 June 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 January 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 February 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}