{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T19:06:09Z","timestamp":1761419169398,"version":"build-2065373602"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"14","license":[{"start":{"date-parts":[[2025,9,22]],"date-time":"2025-09-22T00:00:00Z","timestamp":1758499200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,22]],"date-time":"2025-09-22T00:00:00Z","timestamp":1758499200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Doctoral Scientific Research Foundation of Hubei University of Automotive Technology","award":["BK202102"],"award-info":[{"award-number":["BK202102"]}]},{"name":"Foundation of Hubei Educational Committee","award":["Q20211802"],"award-info":[{"award-number":["Q20211802"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s11760-025-04802-4","type":"journal-article","created":{"date-parts":[[2025,9,22]],"date-time":"2025-09-22T13:13:03Z","timestamp":1758546783000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["LEDepth: A Lightweight Self-Supervised Monocular Depth Estimation Network Combining CNN and Transformer"],"prefix":"10.1007","volume":"19","author":[{"given":"Lei","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chengzhi","family":"Lyu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pan","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hong","family":"He","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fang","family":"Dai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,9,22]]},"reference":[{"key":"4802_CR1","first-page":"20014","volume":"34","author":"A Ali","year":"2021","unstructured":"Ali, A., Touvron, H., Caron, M., et al.: Xcit: cross-covariance image transformers. Adv. Neural. Inf. Process. Syst. 34, 20014\u201320027 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"1","key":"4802_CR2","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1609\/aaai.v37i1.25090","volume":"37","author":"J Bae","year":"2023","unstructured":"Bae, J., Moon, S., Im, S.: Deep digging into the generalization of self-supervised monocular depth estimation. Proc AAAI conf artificial intelligence 37(1), 187\u2013196 (2023)","journal-title":"Proc AAAI conf artificial intelligence"},{"key":"4802_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2024.109313","volume":"138","author":"Z Cheng","year":"2024","unstructured":"Cheng, Z., Zhang, Y., Yu, Y., et al.: Tinydepth: lightweight self-supervised monocular depth estimation based on transformer. Eng. Appl. Artif. Intell. 138, 109313 (2024)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"4802_CR4","doi-asserted-by":"crossref","unstructured":"Dai, Z., Cai, B., Lin, Y., et\u00a0al.: (2021) Up-detr: Unsupervised pre-training for object detection with transformers. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1601\u20131610","DOI":"10.1109\/CVPR46437.2021.00165"},{"key":"4802_CR5","unstructured":"Dosovitskiy, A.: (2020) An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"key":"4802_CR6","unstructured":"Eigen, D., Puhrsch ,C., Fergus, R.: (2014) Depth map prediction from a single image using a multi-scale deep network. Advances in neural information processing systems 27"},{"issue":"11","key":"4802_CR7","doi-asserted-by":"publisher","first-page":"1231","DOI":"10.1177\/0278364913491297","volume":"32","author":"A Geiger","year":"2013","unstructured":"Geiger, A., Lenz, P., Stiller, C., et al.: Vision meets robotics: the kitti dataset. Int. J. Robotics. Res 32(11), 1231\u20131237 (2013)","journal-title":"Int. J. Robotics. Res"},{"key":"4802_CR8","doi-asserted-by":"crossref","unstructured":"Godard, C., Mac\u00a0Aodha, O., Firman, M., et\u00a0al.: (2019) Digging into self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 3828\u20133838","DOI":"10.1109\/ICCV.2019.00393"},{"key":"4802_CR9","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., et\u00a0al.: (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"4802_CR10","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: (2018) Squeeze-and-excitation networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7132\u20137141","DOI":"10.1109\/CVPR.2018.00745"},{"key":"4802_CR11","doi-asserted-by":"publisher","first-page":"1097","DOI":"10.1109\/TIP.2021.3139243","volume":"31","author":"A Karaali","year":"2022","unstructured":"Karaali, A., Harte, N., Jung, C.R.: Deep multi-scale feature learning for defocus blur estimation. IEEE Trans. Image Process. 31, 1097\u20131106 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"4802_CR12","doi-asserted-by":"crossref","unstructured":"Li, S., Lyu, C., Xia, B., et al.: (2024) TAMdepth: self-supervised monocular depth estimation with transformer and adapter modulation. The Visual Computer pp 1\u201312","DOI":"10.1007\/s00371-024-03332-3"},{"issue":"Suppl 2","key":"4802_CR13","doi-asserted-by":"publisher","first-page":"S206","DOI":"10.3103\/S1060992X23060103","volume":"32","author":"S Linok","year":"2023","unstructured":"Linok, S., Yudin, D.: Influence of neural network receptive field on monocular depth and ego-motion estimation. Optical Memory Neural Networks 32(Suppl 2), S206\u2013S213 (2023)","journal-title":"Optical Memory Neural Networks"},{"key":"4802_CR14","unstructured":"Loshchilov, I.: (2017) Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101"},{"key":"4802_CR15","doi-asserted-by":"crossref","unstructured":"Lyu, X., Liu, L., Wang, M., et\u00a0al.: (2021) Hr-depth: High resolution self-supervised monocular depth estimation. In: Proceedings of the AAAI conference on artificial intelligence, pp 2294\u20132301","DOI":"10.1609\/aaai.v35i3.16329"},{"key":"4802_CR16","unstructured":"Saxena, A., Chung, S., Ng, A.: (2005) Learning depth from single monocular images. Advances in neural information processing systems 18"},{"issue":"5","key":"4802_CR17","doi-asserted-by":"publisher","first-page":"824","DOI":"10.1109\/TPAMI.2008.132","volume":"31","author":"A Saxena","year":"2008","unstructured":"Saxena, A., Sun, M., Ng, A.Y.: Make3d: learning 3d scene structure from a single still image. IEEE Trans. Pattern Anal. Mach. Intell. 31(5), 824\u2013840 (2008)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"11","key":"4802_CR18","doi-asserted-by":"publisher","first-page":"4381","DOI":"10.1109\/TCSVT.2021.3049869","volume":"31","author":"M Song","year":"2021","unstructured":"Song, M., Lim, S., Kim, W.: Monocular depth estimation using laplacian pyramid-based depth residuals. IEEE Trans. Circuits Syst. Video Technol. 31(11), 4381\u20134393 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"4802_CR19","volume-title":"Er-depth: Enhancing the robustness of self-supervised monocular depth estimation in challenging scenes","author":"Z Song","year":"2025","unstructured":"Song, Z., Zhu, R., Wang, J., et al.: Er-depth: Enhancing the robustness of self-supervised monocular depth estimation in challenging scenes. ACM Transactions on Multimedia Computing, Communications and Applications (2025)"},{"issue":"5","key":"4802_CR20","doi-asserted-by":"publisher","first-page":"2023","DOI":"10.1109\/TNNLS.2021.3100895","volume":"33","author":"Q Sun","year":"2021","unstructured":"Sun, Q., Tang, Y., Zhang, C., et al.: Unsupervised estimation of monocular depth and vo in dynamic environments via hybrid masks. IEEE Trans. Neural Netw. Learn. Syst. 33(5), 2023\u20132033 (2021)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"4802_CR21","doi-asserted-by":"crossref","unstructured":"Wang, C., Buenaposada, J.M., Zhu, R., et\u00a0al.: (2018) Learning depth from monocular videos using direct methods. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2022\u20132030","DOI":"10.1109\/CVPR.2018.00216"},{"issue":"4","key":"4802_CR22","doi-asserted-by":"publisher","first-page":"4978","DOI":"10.1109\/JSEN.2023.3347585","volume":"24","author":"X Wang","year":"2024","unstructured":"Wang, X., Yu, M., Wang, H., et al.: Self-supervised monocular depth estimation based on high-order spatial interactions. IEEE Sens. J. 24(4), 4978\u20134991 (2024)","journal-title":"IEEE Sens. J."},{"issue":"4","key":"4802_CR23","doi-asserted-by":"publisher","first-page":"991","DOI":"10.1007\/s11760-022-02303-2","volume":"17","author":"Z Xie","year":"2023","unstructured":"Xie, Z., Zhou, S., Zheng, M., et al.: Research on self-supervised depth estimation algorithm of driving scene based on monocular vision. SIViP 17(4), 991\u2013999 (2023)","journal-title":"SIViP"},{"key":"4802_CR24","doi-asserted-by":"publisher","first-page":"678","DOI":"10.1109\/LSP.2021.3067498","volume":"28","author":"X Xu","year":"2021","unstructured":"Xu, X., Chen, Z., Yin, F.: Monocular depth estimation with multi-scale feature fusion. IEEE Signal Process. Lett. 28, 678\u2013682 (2021)","journal-title":"IEEE Signal Process. Lett."},{"key":"4802_CR25","doi-asserted-by":"crossref","unstructured":"Yan, J., Zhao, H., Bu, P., et\u00a0al.: (2021) Channel-wise attention-based network for self-supervised monocular depth estimation. In: 2021 International Conference on 3D vision (3DV), IEEE, pp 464\u2013473","DOI":"10.1109\/3DV53792.2021.00056"},{"key":"4802_CR26","doi-asserted-by":"crossref","unstructured":"Zhang, N., Nex, F., Vosselman, G., et\u00a0al.: (2023) Lite-mono: A lightweight cnn and transformer architecture for self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 18537\u201318546","DOI":"10.1109\/CVPR52729.2023.01778"},{"key":"4802_CR27","doi-asserted-by":"crossref","unstructured":"Zhao, B., He, H., Xu, H., et al.: Rtia-mono: real-time lightweight self-supervised monocular depth estimation with global-local information aggregation. Digital Signal. Process. 156, 104769 (2025)","DOI":"10.1016\/j.dsp.2024.104769"},{"key":"4802_CR28","doi-asserted-by":"crossref","unstructured":"Zhao, C., Zhang, Y., Poggi, M., et\u00a0al.: (2022) Monovit: Self-supervised monocular depth estimation with a vision transformer. In: 2022 international conference on 3D vision (3DV), IEEE, pp 668\u2013678","DOI":"10.1109\/3DV57658.2022.00077"},{"issue":"17","key":"4802_CR29","doi-asserted-by":"publisher","first-page":"19747","DOI":"10.1109\/JSEN.2023.3296497","volume":"23","author":"M Zheng","year":"2023","unstructured":"Zheng, M., Luo, L., Zheng, H., et al.: (2023) A dual encoder-decoder network for self-supervised monocular depth estimation. IEEE .Sens. J. 23(17), 19747\u201319756 (2023)","journal-title":"IEEE .Sens. J."},{"key":"4802_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2024.104032","volume":"245","author":"D Zhou","year":"2024","unstructured":"Zhou, D., Zhang, M., Gao, X., et al.: Complete contextual information extraction for self-supervised monocular depth estimation. Comput. Vis. Image Underst. 245, 104032 (2024)","journal-title":"Comput. Vis. Image Underst."},{"key":"4802_CR31","doi-asserted-by":"crossref","unstructured":"Zhou, T., Brown, M., Snavely, N., et\u00a0al.: (2017) Unsupervised learning of depth and ego-motion from video. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1851\u20131858","DOI":"10.1109\/CVPR.2017.700"},{"key":"4802_CR32","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Fan, X., Shi, P., et\u00a0al.: (2021) R-msfm: Recurrent multi-scale feature modulation for monocular depth estimating. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 12777\u201312786","DOI":"10.1109\/ICCV48922.2021.01254"},{"issue":"12","key":"4802_CR33","doi-asserted-by":"publisher","first-page":"9551","DOI":"10.1109\/TPAMI.2024.3420165","volume":"46","author":"Z Zhou","year":"2024","unstructured":"Zhou, Z., Fan, X., Shi, P., et al.: Recurrent multiscale feature modulation for geometry consistent depth learning. IEEE Trans. Pattern Anal. Mach. Intell. 46(12), 9551\u20139566 (2024)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04802-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-025-04802-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04802-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T18:58:53Z","timestamp":1761418733000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-025-04802-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,22]]},"references-count":33,"journal-issue":{"issue":"14","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["4802"],"URL":"https:\/\/doi.org\/10.1007\/s11760-025-04802-4","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"type":"print","value":"1863-1703"},{"type":"electronic","value":"1863-1711"}],"subject":[],"published":{"date-parts":[[2025,9,22]]},"assertion":[{"value":"3 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 August 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 September 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 September 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"1208"}}