{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T13:29:47Z","timestamp":1767706187493},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"17","license":[{"start":{"date-parts":[[2024,8,7]],"date-time":"2024-08-07T00:00:00Z","timestamp":1722988800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,8,7]],"date-time":"2024-08-07T00:00:00Z","timestamp":1722988800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2024,11]]},"DOI":"10.1007\/s11227-024-06388-z","type":"journal-article","created":{"date-parts":[[2024,8,7]],"date-time":"2024-08-07T05:03:14Z","timestamp":1723006994000},"page":"25099-25117","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Tl-depth: monocular depth estimation based on tower connections and Laplacian-filtering residual completion"],"prefix":"10.1007","volume":"80","author":[{"given":"Qi","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuqin","family":"Song","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hui","family":"Lou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,8,7]]},"reference":[{"key":"6388_CR1","doi-asserted-by":"crossref","unstructured":"Saxena A, Sun M, Ng A-Y (2007) Learning 3-D scene structure from a single still image. In: 2007 IEEE 11th International Conference on Computer Vision, pp 1-8. IEEE","DOI":"10.1109\/ICCV.2007.4408828"},{"issue":"5","key":"6388_CR2","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1167\/17.5.22","volume":"17","author":"C-C Su","year":"2017","unstructured":"Su C-C, Cormack L-K, Bovik A-C (2017) Bayesian depth estimation from monocular natural images. J Vis 17(5):22","journal-title":"J Vis"},{"key":"6388_CR3","doi-asserted-by":"crossref","unstructured":"Koch T, Liebel L, Fraundorfer F, Korner M (2018) Evaluation of CNN-based single-image depth estimation methods. In: Proceedings of the European Conference on Computer Vision (ECCV) Workshops, pp 0-0","DOI":"10.1007\/978-3-030-11015-4_25"},{"key":"6388_CR4","doi-asserted-by":"crossref","unstructured":"Strecha C, Von Hansen W, Van Gool L, Fua P et al (2008) On benchmarking camera calibration and multi-view stereo for high resolution imagery. In: 2008 IEEE Conference on Computer Vision and Pattern Recognition, pp 1-8. IEEE","DOI":"10.1109\/CVPR.2008.4587706"},{"key":"6388_CR5","doi-asserted-by":"crossref","unstructured":"Chang A, Dai A, Funkhouser T, Halber M et al (2017) Matterport3d: learning from RGB-D data in indoor environments. arXiv preprint arXiv:1709.06158","DOI":"10.1109\/3DV.2017.00081"},{"key":"6388_CR6","doi-asserted-by":"crossref","unstructured":"Dai A, Chang A-X, Savva M, Halber M et al (2017) Richly-annotated 3D reconstructions of indoor scenes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp 5828-5839","DOI":"10.1109\/CVPR.2017.261"},{"key":"6388_CR7","unstructured":"Armeni I, Sax S, Zamir A-R, Savarese S (2017) Joint 2D-3D-semantic data for indoor scene understanding. arXiv preprint arXiv:1702.01105"},{"key":"6388_CR8","doi-asserted-by":"crossref","unstructured":"Godard C, Mac Aodha O, Brostow G-J (2017) Unsupervised monocular depth estimation with left-right consistency. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 270-279","DOI":"10.1109\/CVPR.2017.699"},{"key":"6388_CR9","doi-asserted-by":"crossref","unstructured":"Garg R, Bg V-K, Carneiro G, Reid I (2016) Unsupervised CNN for single view depth estimation: geometry to the rescue. In: Computer Vision-ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11-14, 2016, Proceedings, Part VIII 14, pp 740-756. Springer International Publishing","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"6388_CR10","doi-asserted-by":"crossref","unstructured":"Shim D, Kim H-J (2023) SwinDepth: unsupervised depth estimation using monocular sequences via swin transformer and densely cascaded network. In: 2023 IEEE International Conference on Robotics and Automation (ICRA), pp 4983-4990. IEEE","DOI":"10.1109\/ICRA48891.2023.10160657"},{"key":"6388_CR11","doi-asserted-by":"crossref","unstructured":"Liu L, Song X, Wang M, Liu Y et al (2021) Self-supervised monocular depth estimation for all day images using domain separation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 12737-12746","DOI":"10.1109\/ICCV48922.2021.01250"},{"key":"6388_CR12","doi-asserted-by":"crossref","unstructured":"Godard C, Mac Aodha O, Firman M, Brostow G-J (2019) Digging into self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 3828-3838","DOI":"10.1109\/ICCV.2019.00393"},{"issue":"6","key":"6388_CR13","doi-asserted-by":"publisher","first-page":"1309","DOI":"10.1109\/TRO.2016.2624754","volume":"32","author":"C Cadena","year":"2016","unstructured":"Cadena C, Carlone L, Carrillo H, Latif Y et al (2016) Past, present, and future of simultaneous localization and mapping: toward the robust-perception age. IEEE Trans Robot 32(6):1309\u20131332","journal-title":"IEEE Trans Robot"},{"key":"6388_CR14","doi-asserted-by":"crossref","unstructured":"Yu C, Liu Z, Liu X-J, Xie F et al (2018) DS-SLAM: a semantic visual SLAM towards dynamic environments. In: 2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp 1168-1174. IEEE","DOI":"10.1109\/IROS.2018.8593691"},{"key":"6388_CR15","doi-asserted-by":"crossref","unstructured":"Guizilini V, Ambrus R, Pillai S, Raventos A, Gaidon A (2020) 3D packing for self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 2485-2494","DOI":"10.1109\/CVPR42600.2020.00256"},{"key":"6388_CR16","doi-asserted-by":"crossref","unstructured":"Johnston A, Carneiro G (2020) Self-supervised monocular trained depth estimation using self-attention and discrete disparity volume. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 4756-4765","DOI":"10.1109\/CVPR42600.2020.00481"},{"key":"6388_CR17","first-page":"2366","volume":"27","author":"D Eigen","year":"2014","unstructured":"Eigen D, Puhrsch C, Fergus R (2014) Depth map prediction from a single image using a multi-scale deep network. Adv Neural Inf Process Syst 27:2366","journal-title":"Adv Neural Inf Process Syst"},{"key":"6388_CR18","doi-asserted-by":"crossref","unstructured":"Eigen D, Fergus R (2015) Predicting depth, surface normals and semantic labels with a common multi-scale convolutional architecture. In: Proceedings of the IEEE International Conference on Computer Vision, pp 2650-2658","DOI":"10.1109\/ICCV.2015.304"},{"key":"6388_CR19","doi-asserted-by":"crossref","unstructured":"Liu F, Shen C, Lin G (2015) Deep convolutional neural fields for depth estimation from a single image. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 5162-5170","DOI":"10.1109\/CVPR.2015.7299152"},{"key":"6388_CR20","unstructured":"Li B, Shen C, Dai Y, Van Den Hengel A et al (2015) Depth and surface normal estimation from monocular images using regression on deep features and hierarchical CRFS. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 1119-1127"},{"issue":"11","key":"6388_CR21","doi-asserted-by":"publisher","first-page":"3174","DOI":"10.1109\/TCSVT.2017.2740321","volume":"28","author":"Y Cao","year":"2017","unstructured":"Cao Y, Wu Z, Shen C (2017) Estimating depth from monocular images as classification using deep fully convolutional residual networks. IEEE Trans Circuits Syst Video Technol 28(11):3174\u20133182","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"6388_CR22","doi-asserted-by":"crossref","unstructured":"Zhou T, Brown M, Snavely N, Lowe D-G (2017) Unsupervised learning of depth and ego-motion from video. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 1851-1858","DOI":"10.1109\/CVPR.2017.700"},{"key":"6388_CR23","doi-asserted-by":"crossref","unstructured":"Mahjourian R, Wicke M, Angelova A (2018) Unsupervised learning of depth and ego-motion from monocular video using 3d geometric constraints. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 5667-5675","DOI":"10.1109\/CVPR.2018.00594"},{"key":"6388_CR24","unstructured":"Vijayanarasimhan S, Ricco S, Schmid C, Sukthankar R et al (2017) Sfm-net: learning of structure and motion from video. arXiv preprint arXiv:1704.07804"},{"key":"6388_CR25","first-page":"256","volume":"32","author":"J Bian","year":"2019","unstructured":"Bian J, Li Z, Wang N, Zhan H et al (2019) Unsupervised scale-consistent depth and ego-motion learning from monocular video. Adv Neural inf Process Syst 32:256","journal-title":"Adv Neural inf Process Syst"},{"key":"6388_CR26","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D et al (2020) An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"issue":"6","key":"6388_CR27","doi-asserted-by":"publisher","first-page":"837","DOI":"10.1007\/s11633-023-1458-0","volume":"20","author":"Z Li","year":"2023","unstructured":"Li Z, Chen Z, Liu X, Jiang J (2023) Depthformer: exploiting long-range correlation and local information for accurate monocular depth estimation. Mach Intell Res 20(6):837\u2013854","journal-title":"Mach Intell Res"},{"key":"6388_CR28","doi-asserted-by":"crossref","unstructured":"Ranftl R, Bochkovskiy A, Koltun V (2021) Vision transformers for dense prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 12179-12188","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"6388_CR29","doi-asserted-by":"crossref","unstructured":"Yang G, Tang H, Ding M, Sebe N et al (2021) Transformer-based attention networks for continuous pixel-wise prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer vision, pp 16269-16279","DOI":"10.1109\/ICCV48922.2021.01596"},{"key":"6388_CR30","doi-asserted-by":"crossref","unstructured":"Zhao C, Zhang Y, Poggi M, Tosi F et al (2022) Monovit: self-supervised monocular depth estimation with a vision transformer. In: 2022 International Conference on 3D Vision (3DV), pp 668-678. IEEE","DOI":"10.1109\/3DV57658.2022.00077"},{"issue":"23","key":"6388_CR31","doi-asserted-by":"publisher","first-page":"26912","DOI":"10.1109\/JSEN.2021.3120753","volume":"21","author":"Z Cheng","year":"2021","unstructured":"Cheng Z, Zhang Y, Tang C (2021) Swin-depth: using transformers and multi-scale fusion for monocular-based depth estimation. IEEE Sens J 21(23):26912\u201326920","journal-title":"IEEE Sens J"},{"key":"6388_CR32","doi-asserted-by":"crossref","unstructured":"Zhang H, Wu C, Zhang Z, Zhu Y et al (2022) Resnest: split-attention networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 2736-2746","DOI":"10.1109\/CVPRW56347.2022.00309"},{"key":"6388_CR33","doi-asserted-by":"crossref","unstructured":"Woo S, Park J, Lee J-Y, Kweon I-S (2018) Cbam: convolutional block attention module. In: Proceedings of the European Conference on Computer Vision (ECCV), pp 3-19","DOI":"10.1007\/978-3-030-01234-2_1"},{"issue":"11","key":"6388_CR34","doi-asserted-by":"publisher","first-page":"1231","DOI":"10.1177\/0278364913491297","volume":"32","author":"A Geiger","year":"2013","unstructured":"Geiger A, Lenz P, Stiller C, Urtasun R (2013) Vision meets robotics: the kitti dataset. Int J Robot Res 32(11):1231\u20131237","journal-title":"Int J Robot Res"},{"key":"6388_CR35","unstructured":"Paszke A, Gross S, Chintala S, Chanan G et al (2017) Automatic differentiation in pytorch"},{"key":"6388_CR36","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1016\/j.patrec.2017.03.023","volume":"94","author":"M Sharma","year":"2017","unstructured":"Sharma M, Pachori R, Rajendra A (2017) Adam: a method for stochastic optimization. Pattern Recogn Lett 94:172\u2013179","journal-title":"Pattern Recogn Lett"},{"key":"6388_CR37","doi-asserted-by":"publisher","first-page":"497","DOI":"10.1109\/TPAMI.2023.3322549","volume":"46","author":"L Sun","year":"2023","unstructured":"Sun L, Bian J-W, Zhan H, Yin W et al (2023) Sc-depthv3: robust self-supervised monocular depth estimation for dynamic scenes. IEEE Trans Pattern Anal Mach Intell 46:497","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"6388_CR38","doi-asserted-by":"crossref","unstructured":"Klingner M, Term\u00f6hlen J-A, Mikolajczyk J, Fingscheidt T (2020) Self-supervised monocular depth estimation: solving the dynamic object problem by semantic guidance. In: Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23-28, 2020, Proceedings, Part XX 16, pp 582-600. Springer International Publishing","DOI":"10.1007\/978-3-030-58565-5_35"},{"key":"6388_CR39","doi-asserted-by":"crossref","unstructured":"Yan J, Zhao H, Bu P, Jin Y (2021) Channel-wise attention-based network for self-supervised monocular depth estimation. In: 2021 International Conference on 3D vision (3DV), pp 464-473. IEEE","DOI":"10.1109\/3DV53792.2021.00056"},{"key":"6388_CR40","doi-asserted-by":"crossref","unstructured":"Zhou Z, Fan X, Shi P, Xin Y (2021) R-MSFM: recurrent multi-scale feature modulation for monocular depth estimating. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 12777-12786","DOI":"10.1109\/ICCV48922.2021.01254"},{"key":"6388_CR41","doi-asserted-by":"crossref","unstructured":"Zhang N, Nex F, Vosselman G, Kerle N (2023) Lite-mono: a lightweight cnn and transformer architecture for self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 18537-18546","DOI":"10.1109\/CVPR52729.2023.01778"},{"key":"6388_CR42","doi-asserted-by":"crossref","unstructured":"Guizilini V, Ambrus R, Pillai S, Gaidon A (2020) Packnet-SFM: 3D packing for self-supervised monocular depth estimation. arxiv preprint arXiv:1905.02693","DOI":"10.1109\/CVPR42600.2020.00256"},{"key":"6388_CR43","first-page":"333","volume":"46","author":"J Bae","year":"2023","unstructured":"Bae J, Hwang K, Im S (2023) A study on the generality of neural network structures for monocular depth estimation. IEEE Trans Pattern Anal Mach Intell 46:333","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"6388_CR44","first-page":"54987","volume":"36","author":"Y Sun","year":"2024","unstructured":"Sun Y, Hariharan B (2024) Dynamo-depth: fixing unsupervised depth estimation for dynamical scenes. Adv Neural Inf Process Syst 36:54987","journal-title":"Adv Neural Inf Process Syst"},{"issue":"13","key":"6388_CR45","doi-asserted-by":"publisher","first-page":"4090","DOI":"10.3390\/s24134090","volume":"24","author":"H Hu","year":"2024","unstructured":"Hu H, Feng Y, Li D, Zhang S (2024) Monocular depth estimation via self-supervised self-distillation. Sensors 24(13):4090","journal-title":"Sensors"},{"key":"6388_CR46","doi-asserted-by":"crossref","unstructured":"Zhao C, Zhang Y, Poggi M, Tosi F (2022) Monovit: self-supervised monocular depth estimation with a vision transformer. In: 2022 International Conference on 3D Vision (3DV), pp 668-678. IEEE","DOI":"10.1109\/3DV57658.2022.00077"},{"key":"6388_CR47","doi-asserted-by":"crossref","unstructured":"Wang C, Buenaposada J-M, Zhu R, Lucey S (2018) Learning depth from monocular videos using direct methods. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 2022-2030","DOI":"10.1109\/CVPR.2018.00216"},{"issue":"11","key":"6388_CR48","doi-asserted-by":"publisher","first-page":"4381","DOI":"10.1109\/TCSVT.2021.3049869","volume":"31","author":"M Song","year":"2021","unstructured":"Song M, Lim S, Kim W (2021) Monocular depth estimation using laplacian pyramid-based depth residuals. IEEE Trans Circuits Syst Video Technol 31(11):4381\u20134393","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"issue":"5","key":"6388_CR49","doi-asserted-by":"publisher","first-page":"824","DOI":"10.1109\/TPAMI.2008.132","volume":"31","author":"A Saxena","year":"2008","unstructured":"Saxena A, Sun M, Ng A-Y (2008) Make3d: learning 3d scene structure from a single still image. IEEE Trans Pattern Anal Mach Intell 31(5):824\u2013840","journal-title":"IEEE Trans Pattern Anal Mach Intell"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-024-06388-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-024-06388-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-024-06388-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T15:31:02Z","timestamp":1725550262000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-024-06388-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,7]]},"references-count":49,"journal-issue":{"issue":"17","published-print":{"date-parts":[[2024,11]]}},"alternative-id":["6388"],"URL":"https:\/\/doi.org\/10.1007\/s11227-024-06388-z","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"type":"print","value":"0920-8542"},{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2024,8,7]]},"assertion":[{"value":"29 July 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 August 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"This declaration is not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"The authors declare that they have no competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}