{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T06:35:47Z","timestamp":1773470147060,"version":"3.50.1"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T00:00:00Z","timestamp":1732752000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T00:00:00Z","timestamp":1732752000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s00138-024-01640-1","type":"journal-article","created":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T07:52:46Z","timestamp":1732780366000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Self-supervised monocular depth estimation via joint attention and intelligent mask loss"],"prefix":"10.1007","volume":"36","author":[{"given":"Peng","family":"Guo","sequence":"first","affiliation":[]},{"given":"Shuguo","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Wang","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Kourosh","family":"Khoshelham","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,28]]},"reference":[{"key":"1640_CR1","doi-asserted-by":"publisher","first-page":"543","DOI":"10.1007\/s00034-019-01173-3","volume":"39","author":"M Geng","year":"2020","unstructured":"Geng, M., Shang, S., Ding, B., Wang, H., Zhang, P.: Unsupervised learning-based depth estimation-aided visual SLAM Approach. Circ. Syst. Signal. Pr. 39, 543\u2013570 (2020). https:\/\/doi.org\/10.1007\/s00034-019-01173-3","journal-title":"Circ. Syst. Signal. Pr"},{"key":"1640_CR2","doi-asserted-by":"publisher","unstructured":"Chen, C., Seff, A., Kornhauser, A., Xiao, J.: DeepDriving: Learning affordance for direct perception in autonomous driving. 2015 IEEE Int. Conf. Comput. Vis. (ICCV). 2722\u20132730 (2015). https:\/\/doi.org\/10.1109\/ICCV.2015.312","DOI":"10.1109\/ICCV.2015.312"},{"issue":"15","key":"1640_CR3","doi-asserted-by":"publisher","first-page":"11217","DOI":"10.1007\/s00521-020-04702-3","volume":"32","author":"Y Ding","year":"2020","unstructured":"Ding, Y., Lin, L., Wang, L., Zhang, M., Li, D.: Digging into the multi-scale structure for a more refined depth map and 3D reconstruction. Neural Comput. Appl. 32(15), 11217\u201311228 (2020). https:\/\/doi.org\/10.1007\/s00521-020-04702-3","journal-title":"Neural Comput. Appl."},{"issue":"24","key":"1640_CR4","doi-asserted-by":"publisher","first-page":"1375","DOI":"10.1049\/el.2018.6149","volume":"54","author":"B Wang","year":"2018","unstructured":"Wang, B., Feng, Y., Liu, H.: Multi-scale features fusion from sparse LiDAR data and single image for depth completion. Electron. Lett. 54(24), 1375\u20131376 (2018). https:\/\/doi.org\/10.1049\/el.2018.6149","journal-title":"Electron. Lett."},{"key":"1640_CR5","doi-asserted-by":"crossref","unstructured":"Willis, A.R., Papadakis, J., Brink, K.M.: Linear depth reconstruction RGBD Sens. SOUTHEASTCON 2017, (2017)","DOI":"10.1109\/SECON.2017.7925290"},{"key":"1640_CR6","doi-asserted-by":"publisher","first-page":"676","DOI":"10.1049\/iet-cvi.2018.5476","volume":"13","author":"Z Bao","year":"2019","unstructured":"Bao, Z., Li, B., Zhang, W.: Robustness of ToF and stereo fusion for high-accuracy depth map. IET Comput. Vis. 13, 676\u2013681 (2019). https:\/\/doi.org\/10.1049\/iet-cvi.2018.5476","journal-title":"IET Comput. Vis."},{"issue":"6","key":"1640_CR7","doi-asserted-by":"publisher","first-page":"1337","DOI":"10.1007\/s11704-018-8099-4","volume":"13","author":"G Wu","year":"2019","unstructured":"Wu, G., Li, Y., Huang, Y., Liu, Y.: Joint view synthesis and disparity refinement for stereo matching. Front. Comput. Sci. -Chi. 13(6), 1337\u20131352 (2019). https:\/\/doi.org\/10.1007\/s11704-018-8099-4","journal-title":"Front. Comput. Sci. -Chi"},{"key":"1640_CR8","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1016\/j.cviu.2014.02.014","volume":"123","author":"Y Wang","year":"2014","unstructured":"Wang, Y., Gao, Y., Achim, A., Dahnoun, N.: Robust obstacle detection based on a novel disparity calculation method and G-disparity. Comput. Vis. Image Und. 123, 23\u201340 (2014). https:\/\/doi.org\/10.1016\/j.cviu.2014.02.014","journal-title":"Comput. Vis. Image Und"},{"key":"1640_CR9","doi-asserted-by":"publisher","unstructured":"Wang, C., Miguel Buenaposada, J., Zhu, R., Lucey, S.: Learning Depth from Monocular Videos using Direct Methods, In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2022\u20132030, (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00216","DOI":"10.1109\/CVPR.2018.00216"},{"issue":"5","key":"1640_CR10","doi-asserted-by":"publisher","first-page":"824","DOI":"10.1109\/TPAMI.2008.132","volume":"31","author":"A Saxena","year":"2009","unstructured":"Saxena, A., Sun, M., Ng, A.Y.: Make3D: Learning 3D scene structure from a single still image. IEEE T Pattern Anal. 31(5), 824\u2013840 (2009). https:\/\/doi.org\/10.1109\/TPAMI.2008.132","journal-title":"IEEE T Pattern Anal."},{"key":"1640_CR11","doi-asserted-by":"publisher","unstructured":"Laina, I., Rupprecht, C., Belagiannis, V., Tombari, F., Navab, N.: Deeper depth prediction with fully convolutional residual networks. Proc. 2016 FOURTH Int. Conf. 3D Vis. (3DV). pp 239\u2013248 (2016). https:\/\/doi.org\/10.1109\/3DV.2016.32","DOI":"10.1109\/3DV.2016.32"},{"key":"1640_CR12","doi-asserted-by":"publisher","unstructured":"Mayer, N., et al.: A Large dataset to train convolutional networks for disparity, optical flow, and scene flow estimation, In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4040\u20134048, 2016. (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.438","DOI":"10.1109\/CVPR.2016.438"},{"key":"1640_CR13","doi-asserted-by":"publisher","DOI":"10.1109\/TNSM.2024.3353808","author":"Y Liu","year":"2024","unstructured":"Liu, Y., Zhang, J.: Service function chain embedding meets machine learning: deep reinforcement Learning Approach. IEEE TRABSACTIONS Netw. SERVICE Manage. (2024). https:\/\/doi.org\/10.1109\/TNSM.2024.3353808","journal-title":"IEEE TRABSACTIONS Netw. SERVICE Manage."},{"key":"1640_CR14","doi-asserted-by":"crossref","unstructured":"Moon, J., et al.: From-Ground-To-Objects: Coarse-to-Fine Self-supervised Monocular Depth Estimation of Dynamic Objects with Ground Contact Prior. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. (2024)","DOI":"10.1109\/CVPR52733.2024.01001"},{"key":"1640_CR15","doi-asserted-by":"crossref","unstructured":"Han, W.: and Jianbing Shen. High-precision self-supervised monocular depth estimation with rich-resource prior. arXiv preprint arXiv:2408.00361 (2024)","DOI":"10.1007\/978-3-031-72751-1_9"},{"key":"1640_CR16","doi-asserted-by":"publisher","unstructured":"Luo, Y., et al.: Single View Stereo Matching, In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 155\u2013163, (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00024","DOI":"10.1109\/CVPR.2018.00024"},{"key":"1640_CR17","doi-asserted-by":"crossref","unstructured":"Poggi, M., Aleotti, F., Tosi, F., Mattoccia, S.: Towards real-time unsupervised monocular depth estimation on CPU, In: 2018 IEEE\/RSJ International conference on intelligent robots and systems (IROS), pp. 5848\u20135854, (2018)","DOI":"10.1109\/IROS.2018.8593814"},{"issue":"3","key":"1640_CR18","doi-asserted-by":"publisher","first-page":"752","DOI":"10.1007\/s11263-022-01718-1","volume":"131","author":"A Lopez-Rodriguez","year":"2023","unstructured":"Lopez-Rodriguez, A.: Desc: Domain adaptation for depth estimation via semantic consistency. Int. J. Comput. Vision. 131(3), 752\u2013771 (2023)","journal-title":"Int. J. Comput. Vision"},{"key":"1640_CR19","doi-asserted-by":"crossref","unstructured":"Casser, V., Pirk, S., Mahjourian, R., Angelova, A.: Depth Prediction without the Sensors: Leveraging Structure for Unsupervised Learning from Monocular Videos, In: Thirty-third AAAI Conference on artificial intelligence\/thirty-first innovative applications of artificial intelligence conference \/ ninth aaai symposium on educational advances in artificial intelligence, pp. 8001\u20138008, (2019)","DOI":"10.1609\/aaai.v33i01.33018001"},{"key":"1640_CR20","doi-asserted-by":"publisher","unstructured":"Mahjourian, R., Wicke, M., Angelova, A.: Unsupervised learning of depth and Ego-motion from Monocular Video using 3D geometric constraints. 2018 IEEE\/CVF Conf. Comput. Vis. PATTERN Recognit. (CVPR). 5667\u20135675 (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00594","DOI":"10.1109\/CVPR.2018.00594"},{"key":"1640_CR21","doi-asserted-by":"publisher","unstructured":"Yang, Z., Wang, P., Wang, Y., Xu, W., Nevatia, R.: LEGO: Learning edge with geometry all at once by watching videos. In: 2018 IEEE\/CVF Conf. Comput. Vis. PATTERN Recognit. (CVPR). pp 225\u2013234 (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00031","DOI":"10.1109\/CVPR.2018.00031"},{"key":"1640_CR22","doi-asserted-by":"publisher","unstructured":"Yin, Z., Shi, J.: GeoNet: Unsupervised learning of dense depth, Optical Flow and Camera Pose. In: 2018 IEEE\/CVF Conf. Comput. Vis. PATTERN Recognit. (CVPR). 1983\u20131992 (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00212","DOI":"10.1109\/CVPR.2018.00212"},{"key":"1640_CR23","doi-asserted-by":"crossref","unstructured":"Zhang, J., Huang, S., Liu, J., Zhu, X., Xu, F.: PYRF-PCR: A Robust three-stage 3D point Cloud Registration for Outdoor Scene. IEEE Trans. Intell. Veh. 9(1), 1270\u20131281 (Jan. 2024)","DOI":"10.1109\/TIV.2023.3327098"},{"key":"1640_CR24","unstructured":"Eigen, D., Puhrsch, C., Fergus, R.: Depth Map Prediction from a Single Image using a Multi-Scale Deep Network. In: Advances in neural information processing systems 27 (NIPS 2014), vol. 27, (2014)"},{"key":"1640_CR25","doi-asserted-by":"crossref","unstructured":"Liu, F., Shen, C., Lin, G.: Deep Convolutional Neural Fields for Depth Estimation from a Single Image. In:  2015 IEEE conference on computer vision and pattern recognition (CVPR), pp. 5162\u20135170, (2015)","DOI":"10.1109\/CVPR.2015.7299152"},{"key":"1640_CR26","doi-asserted-by":"crossref","unstructured":"Liu, Y., Zhang, J.: Service function chain embedding meets machine learning: Deep reinforcement learning approach. IEEE Trans. Netw. Serv. Manage. (2024)","DOI":"10.1109\/TNSM.2024.3353808"},{"key":"1640_CR27","doi-asserted-by":"crossref","unstructured":"Zhang, J., et al.: Adaptive decomposition and Extraction Network of Individual Fingerprint Features for Specific Emitter Identification. IEEE Trans. Inf. Forensics Secur. (2024)","DOI":"10.1109\/TIFS.2024.3427361"},{"key":"1640_CR28","doi-asserted-by":"publisher","unstructured":"Godard, C., Aodha, O.M., Brostow, G.J.: Unsupervised Monocular Depth Estimation with Left-Right Consistency. In: 30TH IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2017), pp. 6602\u20136611, (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.699","DOI":"10.1109\/CVPR.2017.699"},{"issue":"23","key":"1640_CR29","doi-asserted-by":"publisher","first-page":"26912","DOI":"10.1109\/JSEN.2021.3120753","volume":"21","author":"Z Cheng","year":"2021","unstructured":"Cheng, Z., Zhang, Y., Tang, C.: Swin-depth: Using transformers and multi-scale fusion for monocular-based depth estimation. IEEE Sens. J. 21(23), 26912\u201326920 (2021)","journal-title":"IEEE Sens. J."},{"key":"1640_CR30","doi-asserted-by":"publisher","unstructured":"Zhou, T., Brown, M., Snavely, N., Lowe, D.G.: Unsupervised learning of depth and Ego-motion from video. In: 30TH IEEE Conf. Comput. Vis. PATTERN Recognit. (CVPR 2017). 6612\u2013 (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.700","DOI":"10.1109\/CVPR.2017.700"},{"key":"1640_CR31","doi-asserted-by":"publisher","unstructured":"Godard, C., Mac Aodha, O., Firman, M., Brostow, G.: Digging into self-supervised monocular depth estimation. In: 2019 IEEE\/CVF Int. Conf. Comput. Vis. (ICCV 2019). 3827\u20133837 (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00393","DOI":"10.1109\/ICCV.2019.00393"},{"key":"1640_CR32","doi-asserted-by":"crossref","unstructured":"Zhan, H., et al.: Unsupervised learning of monocular depth estimation and visual odometry with deep feature reconstruction. In: Proceedings of the IEEE conference on computer vision and pattern recognition. (2018)","DOI":"10.1109\/CVPR.2018.00043"},{"issue":"4","key":"1640_CR33","doi-asserted-by":"publisher","first-page":"7791","DOI":"10.1109\/LRA.2021.3101049","volume":"6","author":"V Kaushik","year":"2021","unstructured":"Kaushik, V., Jindgar, K., Lall, B.: ADAADepth: Adapting data augmentation and attention for self-supervised monocular depth estimation. IEEE Rob. Autom. Lett. 6(4), 7791\u20137798 (2021). https:\/\/doi.org\/10.1109\/LRA.2021.3101049","journal-title":"IEEE Rob. Autom. Lett."},{"key":"1640_CR34","doi-asserted-by":"publisher","first-page":"2710","DOI":"10.1109\/TNNLS.2021.3107362","volume":"34","author":"J Zhang","year":"2021","unstructured":"Zhang, J., et al.: Dpsnet: Multitask learning using geometry reasoning for scene depth and semantics. IEEE Trans. Neural Networks Learn. Syst. 34, 2710\u20132721 (2021)","journal-title":"IEEE Trans. Neural Networks Learn. Syst."},{"key":"1640_CR35","doi-asserted-by":"crossref","unstructured":"Liu, J., et al.: Mono-ViFI: A Unified Learning Framework for Self-supervised Single-and Multi-frame Monocular Depth Estimation. arXiv preprint arXiv:2407.14126 (2024)","DOI":"10.1007\/978-3-031-72995-9_6"},{"key":"1640_CR36","doi-asserted-by":"crossref","unstructured":"Vincent, J., et al.: Dynamic object tracking and masking for visual SLAM. In: 2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS). IEEE, (2020)","DOI":"10.1109\/IROS45743.2020.9340958"},{"key":"1640_CR37","doi-asserted-by":"publisher","unstructured":"Zhao, S., Fu, H., Gong, M., Tao, D.: Geometry-aware symmetric domain adaptation for monocular depth estimation. In: 2019 IEEE\/CVF Conf. Comput. Vis. PATTERN Recognit. (CVPR 2019). pp 9780\u20139790 (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.01002","DOI":"10.1109\/CVPR.2019.01002"},{"key":"1640_CR38","doi-asserted-by":"publisher","first-page":"343","DOI":"10.1016\/j.neucom.2020.11.002","volume":"423","author":"Z Lei","year":"2021","unstructured":"Lei, Z., Wang, Y., Li, Z., Yang, J.: Attention based multilayer feature fusion convolutional neural network for unsupervised monocular depth estimation. Neurocomputing. 423, 343\u2013352 (2021). https:\/\/doi.org\/10.1016\/j.neucom.2020.11.002","journal-title":"Neurocomputing"},{"key":"1640_CR39","doi-asserted-by":"publisher","unstructured":"Garg, R., VijayKumar, B.G., Carneiro, G., Reid, I.: Unsupervised CNN for single view depth estimation: Geometry to the Rescue, COMPUTER VISION - ECCV 2016, PT VIII, vol. 9912, pp. 740\u2013756, (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_45","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"1640_CR40","doi-asserted-by":"crossref","unstructured":"Zhang, N., et al.: Lite-mono: A lightweight cnn and transformer architecture for self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. (2023)","DOI":"10.1109\/CVPR52729.2023.01778"},{"issue":"6","key":"1640_CR41","doi-asserted-by":"publisher","first-page":"1583","DOI":"10.1007\/s13042-020-01251-y","volume":"12","author":"Y Chen","year":"2021","unstructured":"Chen, Y., Zhao, H., Hu, Z., Peng, J.: Attention-based context aggregation network for monocular depth estimation. Int. J. Mach. Learn. Cybernet. 12(6), 1583\u20131596 (2021). https:\/\/doi.org\/10.1007\/s13042-020-01251-y","journal-title":"Int. J. Mach. Learn. Cybernet."},{"key":"1640_CR42","doi-asserted-by":"publisher","unstructured":"Fu, H., Gong, M., Wang, C., Batmanghelich, K., Tao, D.: Deep ordinal regression network for monocular depth estimation. In: 2018 IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR 2018). pp 2002\u20132011 (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00214","DOI":"10.1109\/CVPR.2018.00214"},{"issue":"5","key":"1640_CR43","doi-asserted-by":"publisher","first-page":"1147","DOI":"10.1109\/TRO.2015.2463671","volume":"31","author":"R Mur-Artal","year":"2015","unstructured":"Mur-Artal, R., Montiel, J.M.M., Tardos, J.D.: ORB-SLAM: A versatile and Accurate Monocular SLAM System. IEEE T Robot. 31(5), 1147\u20131163 (2015). https:\/\/doi.org\/10.1109\/TRO.2015.2463671","journal-title":"IEEE T Robot"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01640-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-024-01640-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01640-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,18]],"date-time":"2025-01-18T10:28:53Z","timestamp":1737196133000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-024-01640-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,28]]},"references-count":43,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["1640"],"URL":"https:\/\/doi.org\/10.1007\/s00138-024-01640-1","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"value":"0932-8092","type":"print"},{"value":"1432-1769","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,28]]},"assertion":[{"value":"26 August 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 October 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 November 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 November 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"11"}}