{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T05:53:11Z","timestamp":1750830791603,"version":"3.37.3"},"reference-count":57,"publisher":"Springer Science and Business Media LLC","issue":"24","license":[{"start":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T00:00:00Z","timestamp":1705017600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T00:00:00Z","timestamp":1705017600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100007162","name":"Department of science and technology of Guangdong Province","doi-asserted-by":"crossref","award":["2021B01420003"],"award-info":[{"award-number":["2021B01420003"]}],"id":[{"id":"10.13039\/501100007162","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-17976-1","type":"journal-article","created":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T06:01:51Z","timestamp":1705039311000},"page":"65579-65601","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Dual-attention-based semantic-aware self-supervised monocular depth estimation"],"prefix":"10.1007","volume":"83","author":[{"given":"Jinze","family":"Xu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3238-5975","authenticated-orcid":false,"given":"Feng","family":"Ye","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yizong","family":"Lai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,1,12]]},"reference":[{"key":"17976_CR1","doi-asserted-by":"crossref","unstructured":"Klingner M, Term\u00f6hlen JA, Mikolajczyk J et\u00a0al (2020) Self-supervised monocular depth estimation: Solving the dynamic object problem by semantic guidance. In: Computer vision\u2013ECCV 2020: 16th European conference, Springer, pp 582\u2013600","DOI":"10.1007\/978-3-030-58565-5_35"},{"key":"17976_CR2","doi-asserted-by":"crossref","unstructured":"Guizilini V, Ambrus R, Pillai S et\u00a0al (2020) 3d packing for self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2485\u20132494","DOI":"10.1109\/CVPR42600.2020.00256"},{"issue":"21","key":"17976_CR3","doi-asserted-by":"publisher","first-page":"5491","DOI":"10.3390\/rs14215491","volume":"14","author":"C Tang","year":"2022","unstructured":"Tang C, Wang Y, Zhang L et al (2022) Multisource fusion uav cluster cooperative positioning using information geometry. Remote Sensing 14(21):5491","journal-title":"Remote Sensing"},{"issue":"23","key":"17976_CR4","doi-asserted-by":"publisher","first-page":"6094","DOI":"10.3390\/rs14236094","volume":"14","author":"C Tang","year":"2022","unstructured":"Tang C, Wang C, Zhang L et al (2022) Multivehicle 3d cooperative positioning algorithm based on information geometric probability fusion of gnss\/wireless station navigation. Remote Sensing 14(23):6094","journal-title":"Remote Sensing"},{"key":"17976_CR5","unstructured":"Eigen D, Puhrsch C, Fergus R (2014) Depth map prediction from a single image using a multi-scale deep network. In: Advances in neural information processing systems"},{"key":"17976_CR6","doi-asserted-by":"crossref","unstructured":"Fu H, Gong M, Wang C et\u00a0al (2018) Deep ordinal regression network for monocular depth estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2002\u20132011","DOI":"10.1109\/CVPR.2018.00214"},{"key":"17976_CR7","doi-asserted-by":"publisher","unstructured":"Farooq\u00a0Bhat S, Alhashim I, Wonka P (2021) Adabins: depth estimation using adaptive bins. In: 2021 IEEE\/CVF Conference on computer vision and pattern recognition (CVPR), pp 4008\u20134017. https:\/\/doi.org\/10.1109\/CVPR46437.2021.00400","DOI":"10.1109\/CVPR46437.2021.00400"},{"key":"17976_CR8","doi-asserted-by":"crossref","unstructured":"Xie J, Girshick R, Farhadi A (2016) Deep3d: fully automatic 2d-to-3d video conversion with deep convolutional neural networks. In: Computer Vision\u2013ECCV 2016: 14th European conference, pp 842\u2013857","DOI":"10.1007\/978-3-319-46493-0_51"},{"key":"17976_CR9","doi-asserted-by":"crossref","unstructured":"Garg R, B.G. VK, Carneiro G et\u00a0al (2016) Unsupervised cnn for single view depth estimation: Geometry to the rescue. In: Computer Vision \u2013 ECCV 2016, Cham, pp 740\u2013756","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"17976_CR10","doi-asserted-by":"crossref","unstructured":"Zhou T, Brown M, Snavely N et\u00a0al (2017) Unsupervised learning of depth and ego-motion from video. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1851\u20131858","DOI":"10.1109\/CVPR.2017.700"},{"key":"17976_CR11","doi-asserted-by":"crossref","unstructured":"Godard C, Mac\u00a0Aodha O, Firman M et\u00a0al (2019) Digging into self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 3828\u20133838","DOI":"10.1109\/ICCV.2019.00393"},{"key":"17976_CR12","doi-asserted-by":"crossref","unstructured":"Shu C, Yu K, Duan Z et\u00a0al (2020) Feature-metric loss for self-supervised learning of depth and egomotion. In: Computer vision\u2013ECCV 2020: 16th European conference, pp 572\u2013588","DOI":"10.1007\/978-3-030-58529-7_34"},{"key":"17976_CR13","doi-asserted-by":"crossref","unstructured":"Guizilini V, Hou R, Li J et\u00a0al (2020) Semantically-guided representation learning for self-supervised monocular depth. arXiv:2002.12319","DOI":"10.1109\/CVPR42600.2020.00256"},{"key":"17976_CR14","unstructured":"Choi J, Jung D, Lee D et\u00a0al (2020) Safenet: Self-supervised monocular depth estimation with semantic-aware feature extraction. arXiv:2010.02893"},{"key":"17976_CR15","doi-asserted-by":"crossref","unstructured":"Jung H, Park E, Yoo S (2021) Fine-grained semantics-aware representation enhancement for self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 12,642\u201312,652","DOI":"10.1109\/ICCV48922.2021.01241"},{"key":"17976_CR16","doi-asserted-by":"crossref","unstructured":"Zama\u00a0Ramirez P, Poggi M, Tosi F et\u00a0al (2019) Geometry meets semantics for semi-supervised monocular depth estimation. In: Computer vision\u2013ACCV 2018: 14th asian conference on computer vision, Springer, pp 298\u2013313","DOI":"10.1007\/978-3-030-20893-6_19"},{"key":"17976_CR17","doi-asserted-by":"crossref","unstructured":"Zhu S, Brazil G, Liu X (2020) The edge of depth: explicit constraints between segmentation and depth. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 13,116\u201313,125","DOI":"10.1109\/CVPR42600.2020.01313"},{"key":"17976_CR18","doi-asserted-by":"crossref","unstructured":"Li R, Xue D, Su S, et\u00a0al. (2023) Learning depth via leveraging semantics: self-supervised monocular depth estimation with both implicit and explicit semantic guidance. Pattern Recognition p 109297","DOI":"10.1016\/j.patcog.2022.109297"},{"key":"17976_CR19","unstructured":"Cai H, Matai J, Borse S et\u00a0al (2021) X-distill: improving self-supervised monocular depth via cross-task distillation. arXiv:2110.12516"},{"key":"17976_CR20","doi-asserted-by":"crossref","unstructured":"Peng R, Wang R, Lai Y et\u00a0al (2021) Excavating the potential capacity of self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF International conference on computer vision, pp 15,560\u201315,569","DOI":"10.1109\/ICCV48922.2021.01527"},{"key":"17976_CR21","doi-asserted-by":"crossref","unstructured":"Godard C, Mac\u00a0Aodha O, Brostow GJ (2017) Unsupervised monocular depth estimation with left-right consistency. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 270\u2013279","DOI":"10.1109\/CVPR.2017.699"},{"key":"17976_CR22","doi-asserted-by":"crossref","unstructured":"Poggi M, Tosi F, Mattoccia S (2018) Learning monocular depth estimation with unsupervised trinocular assumptions. In: 2018 International conference on 3d vision (3DV), IEEE, pp 324\u2013333","DOI":"10.1109\/3DV.2018.00045"},{"key":"17976_CR23","unstructured":"GonzalezBello JL, Kim M (2020) Forget about the lidar: self-supervised depth estimators with med probability volumes. In: Advances in neural information processing systems, pp 12,626\u201312,637"},{"key":"17976_CR24","doi-asserted-by":"crossref","unstructured":"Watson J, Firman M, Brostow GJ et\u00a0al (2019) Self-supervised monocular depth hints. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2162\u20132171","DOI":"10.1109\/ICCV.2019.00225"},{"issue":"2","key":"17976_CR25","doi-asserted-by":"publisher","first-page":"328","DOI":"10.1109\/TPAMI.2007.1166","volume":"30","author":"H Hirschmuller","year":"2007","unstructured":"Hirschmuller H (2007) Stereo processing by semiglobal matching and mutual information. IEEE Trans Pattern Anal Mach Intell 30(2):328\u2013341","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"17976_CR26","doi-asserted-by":"crossref","unstructured":"Poggi M, Aleotti F, Tosi F et\u00a0al (2020) On the uncertainty of self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3227\u20133237","DOI":"10.1109\/CVPR42600.2020.00329"},{"key":"17976_CR27","doi-asserted-by":"crossref","unstructured":"Yang N, Stumberg Lv, Wang R et\u00a0al (2020) D3vo: deep depth, deep pose and deep uncertainty for monocular visual odometry. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1281\u20131292","DOI":"10.1109\/CVPR42600.2020.00136"},{"key":"17976_CR28","doi-asserted-by":"crossref","unstructured":"Ranjan A, Jampani V, Balles L et\u00a0al (2019) Competitive collaboration: joint unsupervised learning of depth, camera motion, optical flow and motion segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12,240\u201312,249","DOI":"10.1109\/CVPR.2019.01252"},{"issue":"2","key":"17976_CR29","doi-asserted-by":"publisher","first-page":"3491","DOI":"10.1109\/LRA.2022.3145057","volume":"7","author":"V Guizilini","year":"2022","unstructured":"Guizilini V, Lee KH, Ambru\u015f R et al (2022) Learning optical flow, depth, and scene flow without real-world labels. IEEE Robotics Automation Lett 7(2):3491\u20133498","journal-title":"IEEE Robotics Automation Lett"},{"key":"17976_CR30","doi-asserted-by":"crossref","unstructured":"Yin Z, Shi J (2018) Geonet: unsupervised learning of dense depth, optical flow and camera pose. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1983\u20131992","DOI":"10.1109\/CVPR.2018.00212"},{"key":"17976_CR31","doi-asserted-by":"crossref","unstructured":"Xiang J, Wang Y, An L, et\u00a0al. (2022) Visual attention-based self-supervised absolute depth estimation using geometric priors in autonomous driving. IEEE Robotics and Automation Letters 7(4):11,998\u201312,005","DOI":"10.1109\/LRA.2022.3210298"},{"key":"17976_CR32","doi-asserted-by":"crossref","unstructured":"Petrovai A, Nedevschi S (2022) Exploiting pseudo labels in a self-supervised learning framework for improved monocular depth estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1578\u20131588","DOI":"10.1109\/CVPR52688.2022.00163"},{"key":"17976_CR33","doi-asserted-by":"crossref","unstructured":"Yan J, Zhao H, Bu P et\u00a0al (2021) Channel-wise attention-based network for self-supervised monocular depth estimation. In: 2021 International conference on 3D vision (3DV), IEEE, pp 464\u2013473","DOI":"10.1109\/3DV53792.2021.00056"},{"key":"17976_CR34","doi-asserted-by":"crossref","unstructured":"Lyu X, Liu L, Wang M et\u00a0al (2021) Hr-depth: high resolution self-supervised monocular depth estimation. In: Proceedings of the AAAI conference on artificial intelligence, pp 2294\u20132301","DOI":"10.1609\/aaai.v35i3.16329"},{"key":"17976_CR35","unstructured":"Zhou H, Greenwood D, Taylor S (2021a) Self-supervised monocular depth estimation with internal feature fusion. arXiv:2110.09482"},{"key":"17976_CR36","doi-asserted-by":"crossref","unstructured":"Zhou Z, Fan X, Shi P et\u00a0al (2021b) R-msfm: recurrent multi-scale feature modulation for monocular depth estimating. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 12,777\u201312,786","DOI":"10.1109\/ICCV48922.2021.01254"},{"key":"17976_CR37","doi-asserted-by":"crossref","unstructured":"Han W, Yin J, Jin X et\u00a0al (2022) Brnet: exploring comprehensive features for monocular depth estimation. In: Computer Vision\u2013ECCV 2022: 17th European conference, Springer, pp 586\u2013602","DOI":"10.1007\/978-3-031-19839-7_34"},{"key":"17976_CR38","doi-asserted-by":"crossref","unstructured":"Zhao C, Zhang Y, Poggi M et\u00a0al (2022) Monovit: self-supervised monocular depth estimation with a vision transformer. arXiv:2208.03543","DOI":"10.1109\/3DV57658.2022.00077"},{"key":"17976_CR39","doi-asserted-by":"crossref","unstructured":"Lee S, Im S, Lin S et\u00a0al (2021) Learning monocular depth in dynamic scenes via instance-aware projection consistency. In: Proceedings of the AAAI conference on artificial intelligence, pp 1863\u20131872","DOI":"10.1609\/aaai.v35i3.16281"},{"key":"17976_CR40","unstructured":"He C, Li K, Zhang Y et\u00a0al (2023a) Weakly-supervised concealed object segmentation with sam-based pseudo labeling and multi-scale feature grouping. arXiv:2305.11003"},{"key":"17976_CR41","doi-asserted-by":"crossref","unstructured":"He C, Li K, Zhang Y et\u00a0al (2023b) Camouflaged object detection with feature decomposition and edge reconstruction. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 22,046\u201322,055","DOI":"10.1109\/CVPR52729.2023.02111"},{"key":"17976_CR42","unstructured":"He C, Li K, Zhang Y et\u00a0al (2023c) Strategic preys make acute predators: Enhancing camouflaged object detectors by generating camouflaged objects. arXiv:2308.03166"},{"key":"17976_CR43","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S et\u00a0al (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"issue":"4","key":"17976_CR44","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang Z, Bovik AC, Sheikh HR et al (2004) Image quality assessment: from error visibility to structural similarity. IEEE Trans Image Process 13(4):600\u2013612","journal-title":"IEEE Trans Image Process"},{"key":"17976_CR45","doi-asserted-by":"crossref","unstructured":"Zhu Y, Sapra K, Reda FA et\u00a0al (2019) Improving semantic segmentation via video propagation and label relaxation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 8856\u20138865","DOI":"10.1109\/CVPR.2019.00906"},{"key":"17976_CR46","doi-asserted-by":"crossref","unstructured":"Bolya D, Fu CY, Dai X et\u00a0al (2023) Hydra attention: efficient attention with many heads. In: Computer vision\u2013ECCV 2022 Workshops, Springer, pp 35\u201349","DOI":"10.1007\/978-3-031-25082-8_3"},{"key":"17976_CR47","unstructured":"Vaswani A, Shazeer N, Parmar N et\u00a0al (2017) Attention is all you need. In: Advances in neural information processing systems"},{"key":"17976_CR48","unstructured":"Katharopoulos A, Vyas A, Pappas N et\u00a0al (2020) Transformers are rnns: fast autoregressive transformers with linear attention. In: International conference on machine learning, pp 5156\u20135165"},{"key":"17976_CR49","doi-asserted-by":"crossref","unstructured":"Geiger A, Lenz P, Urtasun R (2012) Are we ready for autonomous driving? the kitti vision benchmark suite. In: 2012 IEEE conference on computer vision and pattern recognition, IEEE, pp 3354\u20133361","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"17976_CR50","doi-asserted-by":"crossref","unstructured":"Silberman N, Hoiem D, Kohli P et\u00a0al (2012) Indoor segmentation and support inference from rgbd images. In: Computer Vision\u2013ECCV 2012: 12th European conference on computer vision, Florence, Italy, October 7-13, 2012, Proceedings, Part V 12, Springer, pp 746\u2013760","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"17976_CR51","doi-asserted-by":"crossref","unstructured":"Menze M, Geiger A (2015) Object scene flow for autonomous vehicles. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3061\u20133070","DOI":"10.1109\/CVPR.2015.7298925"},{"issue":"6","key":"17976_CR52","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) Imagenet classification with deep convolutional neural networks. Commun ACM 60(6):84\u201390","journal-title":"Commun ACM"},{"key":"17976_CR53","unstructured":"Paszke A, Gross S, Chintala S et\u00a0al (2017) Automatic differentiation in pytorch. In: International conference on learning representations (ICLR)"},{"key":"17976_CR54","unstructured":"Kingma DP, Ba J (2015) Adam: a method for stochastic optimization. In: International conference on learning representations (ICLR)"},{"key":"17976_CR55","unstructured":"Bian J, Li Z, Wang N et\u00a0al (2019) Unsupervised scale-consistent depth and ego-motion learning from monocular video. In: Advances in neural information processing systems"},{"key":"17976_CR56","doi-asserted-by":"crossref","unstructured":"Wang L, Wang Y, Wang L et\u00a0al (2021) Can scale-consistent monocular depth be learned in a self-supervised scale-invariant manner? In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 12,727\u201312,736","DOI":"10.1109\/ICCV48922.2021.01249"},{"key":"17976_CR57","doi-asserted-by":"crossref","unstructured":"Dijk Tv, Croon Gd (2019) How do neural networks see depth in single images? In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 2183\u20132191","DOI":"10.1109\/ICCV.2019.00227"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17976-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-17976-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17976-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T22:59:04Z","timestamp":1731020344000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-17976-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,12]]},"references-count":57,"journal-issue":{"issue":"24","published-online":{"date-parts":[[2024,7]]}},"alternative-id":["17976"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-17976-1","relation":{},"ISSN":["1573-7721"],"issn-type":[{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2024,1,12]]},"assertion":[{"value":"8 May 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 December 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 December 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 January 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they do not have any conflicts of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}}]}}