{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T15:36:53Z","timestamp":1776785813737,"version":"3.51.2"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T00:00:00Z","timestamp":1723248000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T00:00:00Z","timestamp":1723248000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/100016808","name":"Natural Science Foundation of Xiamen Municipality","doi-asserted-by":"publisher","award":["2023J011437"],"award-info":[{"award-number":["2023J011437"]}],"id":[{"id":"10.13039\/100016808","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100015916","name":"Fujian Province Key Laboratory of Special Aquatic Formula Feed","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100015916","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Complex Intell. Syst."],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s40747-024-01575-0","type":"journal-article","created":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T09:02:16Z","timestamp":1723280536000},"page":"7927-7941","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Repmono: a lightweight self-supervised monocular depth estimation architecture for high-speed inference"],"prefix":"10.1007","volume":"10","author":[{"given":"Guowei","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Xincheng","family":"Tang","sequence":"additional","affiliation":[]},{"given":"Li","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Huankang","family":"Cui","sequence":"additional","affiliation":[]},{"given":"Teng","family":"Fei","sequence":"additional","affiliation":[]},{"given":"Hulin","family":"Tang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2083-2680","authenticated-orcid":false,"given":"Shangfeng","family":"Jiang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,10]]},"reference":[{"key":"1575_CR1","doi-asserted-by":"crossref","unstructured":"Menze M, Geiger A (2015) Object scene flow for autonomous vehicles. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3061\u20133070","DOI":"10.1109\/CVPR.2015.7298925"},{"issue":"2","key":"1575_CR2","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1109\/34.982903","volume":"24","author":"GN DeSouza","year":"2002","unstructured":"DeSouza GN, Kak AC (2002) Vision for mobile robot navigation: a survey. IEEE Trans Pattern Anal Mach Intell 24(2):237\u2013267","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1575_CR3","doi-asserted-by":"publisher","first-page":"7","DOI":"10.4108\/airo.v1i1.2709","volume":"1","author":"G Xu","year":"2022","unstructured":"Xu G, Khan A, Moshayedi AJ, Zhang X, Shuxin Y (2022) The object detection, perspective and obstacles in robotic: a review. EAI Endorsed Trans AI Robot 1:7\u201315. https:\/\/doi.org\/10.4108\/airo.v1i1.2709","journal-title":"EAI Endorsed Trans AI Robot"},{"key":"1575_CR4","doi-asserted-by":"crossref","unstructured":"Newcombe RA, Lovegrove SJ, Davison AJ (2011) DTAM: dense tracking and mapping in real-time. In: 2011 International conference on computer vision. IEEE, pp 2320\u20132327","DOI":"10.1109\/ICCV.2011.6126513"},{"issue":"9","key":"1575_CR5","doi-asserted-by":"publisher","first-page":"1612","DOI":"10.1007\/s11431-020-1582-8","volume":"63","author":"C Zhao","year":"2020","unstructured":"Zhao C, Sun Q, Zhang C, Tang Y, Qian F (2020) Monocular depth estimation based on deep learning: an overview. Sci China Technol Sci 63(9):1612\u20131627","journal-title":"Sci China Technol Sci"},{"key":"1575_CR6","doi-asserted-by":"crossref","unstructured":"Dai Z, Cai B, Lin Y, Chen J (2021) UP-DETR: unsupervised pre-training for object detection with transformers. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1601\u20131610","DOI":"10.1109\/CVPR46437.2021.00165"},{"key":"1575_CR7","doi-asserted-by":"crossref","unstructured":"Zhao C, Zhang Y, Poggi M, Tosi F, Guo X, Zhu Z, Huang G, Tang Y, Mattoccia S (2022) Monovit: self-supervised monocular depth estimation with a vision transformer. In: 2022 International conference on 3D vision (3DV). IEEE, pp 668\u2013678","DOI":"10.1109\/3DV57658.2022.00077"},{"key":"1575_CR8","doi-asserted-by":"crossref","unstructured":"Varma A, Chawla H, Zonooz B, Arani E (2022) Transformers in self-supervised monocular depth estimation with unknown camera intrinsics. CoRR abs\/2202.03131. arXiv:2202.03131","DOI":"10.5220\/0010884000003124"},{"key":"1575_CR9","doi-asserted-by":"crossref","unstructured":"Zhang N, Nex F, Vosselman G, Kerle N (2023) Lite-mono: a lightweight CNN and transformer architecture for self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 18537\u201318546","DOI":"10.1109\/CVPR52729.2023.01778"},{"key":"1575_CR10","doi-asserted-by":"publisher","unstructured":"Moshayedi AJ, Khan AS, Yang S, Zanjani SM (2022) Personal image classifier based handy pipe defect recognizer (hpd): Design and test. In: 2022 7th International conference on intelligent computing and signal processing (ICSP), pp 1721\u20131728. https:\/\/doi.org\/10.1109\/ICSP54964.2022.9778676","DOI":"10.1109\/ICSP54964.2022.9778676"},{"key":"1575_CR11","doi-asserted-by":"publisher","DOI":"10.3390\/s23208422","author":"AJ Moshayedi","year":"2023","unstructured":"Moshayedi AJ, Uddin NMI, Khan AS, Zhu J, Emadi Andani M (2023) Designing and developing a vision-based system to investigate the emotional effects of news on short sleep at noon: an experimental case study. Sensors. https:\/\/doi.org\/10.3390\/s23208422","journal-title":"Sensors"},{"key":"1575_CR12","doi-asserted-by":"crossref","unstructured":"Yu W, Luo M, Zhou P, Si C, Zhou Y, Wang X, Feng J, Yan S (2022) Metaformer is actually what you need for vision. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10819\u201310829","DOI":"10.1109\/CVPR52688.2022.01055"},{"key":"1575_CR13","doi-asserted-by":"crossref","unstructured":"Hu J, Shen L, Sun G (2018) Squeeze-and-excitation networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7132\u20137141","DOI":"10.1109\/CVPR.2018.00745"},{"issue":"11","key":"1575_CR14","doi-asserted-by":"publisher","first-page":"1231","DOI":"10.1177\/0278364913491297","volume":"32","author":"A Geiger","year":"2013","unstructured":"Geiger A, Lenz P, Stiller C, Urtasun R (2013) Vision meets robotics: the Kitti dataset. Int J Robot Res 32(11):1231\u20131237","journal-title":"Int J Robot Res"},{"issue":"5","key":"1575_CR15","doi-asserted-by":"publisher","first-page":"824","DOI":"10.1109\/TPAMI.2008.132","volume":"31","author":"A Saxena","year":"2008","unstructured":"Saxena A, Sun M, Ng AY (2008) Make3D: learning 3D scene structure from a single still image. IEEE Trans Pattern Anal Mach Intell 31(5):824\u2013840","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1575_CR16","doi-asserted-by":"crossref","unstructured":"Yang G, Song X, Huang C, Deng Z, Shi J, Zhou B (2019) Drivingstereo: a large-scale dataset for stereo matching in autonomous driving scenarios. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 899\u2013908","DOI":"10.1109\/CVPR.2019.00099"},{"key":"1575_CR17","unstructured":"Eigen D, Puhrsch C, Fergus R (2014) Depth map prediction from a single image using a multi-scale deep network. Adv Neural Inf Process Syst 27"},{"key":"1575_CR18","unstructured":"Li B, Shen C, Dai Y, Van Den\u00a0Hengel A, He M (2015) Depth and surface normal estimation from monocular images using regression on deep features and hierarchical CRFs. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1119\u20131127"},{"issue":"10","key":"1575_CR19","doi-asserted-by":"publisher","first-page":"2024","DOI":"10.1109\/TPAMI.2015.2505283","volume":"38","author":"F Liu","year":"2015","unstructured":"Liu F, Shen C, Lin G, Reid I (2015) Learning depth from single monocular images using deep convolutional neural fields. IEEE Trans Pattern Anal Mach Intell 38(10):2024\u20132039","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1575_CR20","doi-asserted-by":"crossref","unstructured":"Xu D, Ricci E, Ouyang W, Wang X, Sebe N (2017) Multi-scale continuous CRFs as sequential deep networks for monocular depth estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5354\u20135362","DOI":"10.1109\/CVPR.2017.25"},{"key":"1575_CR21","doi-asserted-by":"crossref","unstructured":"Garg R, Bg VK, Carneiro G, Reid I (2016) Unsupervised CNN for single view depth estimation: Geometry to the rescue. In: Computer vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part VIII 14. Springer, pp 740\u2013756","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"1575_CR22","doi-asserted-by":"crossref","unstructured":"Godard C, Mac\u00a0Aodha O, Brostow GJ (2017) Unsupervised monocular depth estimation with left-right consistency. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 270\u2013279","DOI":"10.1109\/CVPR.2017.699"},{"key":"1575_CR23","doi-asserted-by":"crossref","unstructured":"Zhou T, Brown M, Snavely N, Lowe DG (2017) Unsupervised learning of depth and ego-motion from video. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1851\u20131858","DOI":"10.1109\/CVPR.2017.700"},{"key":"1575_CR24","unstructured":"Vijayanarasimhan S, Ricco S, Schmid C, Sukthankar R, Fragkiadaki K (2017) Sfm-net: learning of structure and motion from video. arXiv preprint arXiv:1704.07804"},{"key":"1575_CR25","doi-asserted-by":"crossref","unstructured":"Guizilini V, Hou R, Li J, Ambrus R, Gaidon A (2020) Semantically-guided representation learning for self-supervised monocular depth. arXiv preprint arXiv:2002.12319","DOI":"10.1109\/CVPR42600.2020.00256"},{"key":"1575_CR26","doi-asserted-by":"crossref","unstructured":"Yin Z, Shi J (2018) Geonet: Unsupervised learning of dense depth, optical flow and camera pose. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1983\u20131992","DOI":"10.1109\/CVPR.2018.00212"},{"key":"1575_CR27","doi-asserted-by":"crossref","unstructured":"Godard C, Mac\u00a0Aodha O, Firman M, Brostow GJ (2019) Digging into self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 3828\u20133838","DOI":"10.1109\/ICCV.2019.00393"},{"key":"1575_CR28","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"1575_CR29","unstructured":"Zhou H, Greenwood D, Taylor S (2021) Self-supervised monocular depth estimation with internal feature fusion. arXiv preprint arXiv:2110.09482"},{"key":"1575_CR30","doi-asserted-by":"crossref","unstructured":"Guizilini V, Ambrus R, Pillai S, Raventos A, Gaidon A (2020) 3d packing for self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2485\u20132494","DOI":"10.1109\/CVPR42600.2020.00256"},{"key":"1575_CR31","doi-asserted-by":"crossref","unstructured":"Yan J, Zhao H, Bu P, Jin Y (2021) Channel-wise attention-based network for self-supervised monocular depth estimation. In: 2021 International conference on 3D vision (3DV). IEEE, pp 464\u2013473","DOI":"10.1109\/3DV53792.2021.00056"},{"issue":"9","key":"1575_CR32","doi-asserted-by":"publisher","first-page":"2567","DOI":"10.1007\/s00371-021-02206-2","volume":"37","author":"Y Li","year":"2021","unstructured":"Li Y, Luo F, Li W, Zheng S, Wu H-H, Xiao C (2021) Self-supervised monocular depth estimation based on image texture detail enhancement. Vis Comput 37(9):2567\u20132580","journal-title":"Vis Comput"},{"key":"1575_CR33","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556"},{"key":"1575_CR34","unstructured":"Loshchilov I, Hutter F (2017) Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101"},{"key":"1575_CR35","doi-asserted-by":"crossref","unstructured":"Hoang V-T, Jo K-H (2019) Pydnet: an efficient CNN architecture with pyramid depthwise convolution kernels. In: 2019 International conference on system science and engineering (ICSSE). IEEE, pp 154\u2013158","DOI":"10.1109\/ICSSE.2019.8823302"},{"key":"1575_CR36","doi-asserted-by":"crossref","unstructured":"Wofk D, Ma F, Yang T-J, Karaman S, Sze V (2019) Fastdepth: fast monocular depth estimation on embedded systems. In: 2019 International conference on robotics and automation (ICRA). IEEE, pp 6101\u20136108","DOI":"10.1109\/ICRA.2019.8794182"},{"key":"1575_CR37","doi-asserted-by":"crossref","unstructured":"Ding X, Zhang X, Ma N, Han J, Ding G, Sun, J (2021) Repvgg: Making vgg-style convnets great again. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 13733\u201313742","DOI":"10.1109\/CVPR46437.2021.01352"},{"key":"1575_CR38","doi-asserted-by":"crossref","unstructured":"Ding X, Zhang X, Han J, Ding G (2021) Diverse branch block: building a convolution as an inception-like unit. CoRR abs\/2103.13425arXiv:2103.13425","DOI":"10.1109\/CVPR46437.2021.01074"},{"key":"1575_CR39","doi-asserted-by":"crossref","unstructured":"Lyu X, Liu L, Wang M, Kong X, Liu L, Liu Y, Chen X, Yuan Y (2021) Hr-depth: high resolution self-supervised monocular depth estimation. In: Proceedings of the AAAI conference on artificial intelligence, vol 35, pp 2294\u20132301","DOI":"10.1609\/aaai.v35i3.16329"},{"key":"1575_CR40","unstructured":"Jaderberg M, Simonyan K, Zisserman A et al (2015) Spatial transformer networks. Adv Neural Inf Process Syst 28"},{"issue":"4","key":"1575_CR41","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang Z, Bovik AC, Sheikh HR, Simoncelli EP (2004) Image quality assessment: from error visibility to structural similarity. IEEE Trans Image Process 13(4):600\u2013612","journal-title":"IEEE Trans Image Process"},{"key":"1575_CR42","doi-asserted-by":"crossref","unstructured":"Uhrig J, Schneider N, Schneider L, Frnke U, Brox T, Geiger A (2017) Sparsity invariant CNNs. In: 2017 International conference on 3D vision (3DV). IEEE, pp 11\u201320","DOI":"10.1109\/3DV.2017.00012"},{"key":"1575_CR43","doi-asserted-by":"crossref","unstructured":"Wang C, Buenaposada JM, Zhu R, Lucey S (2018) Learning depth from monocular videos using direct methods. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2022\u20132030","DOI":"10.1109\/CVPR.2018.00216"},{"key":"1575_CR44","doi-asserted-by":"crossref","unstructured":"Casser V, Pirk S, Mahjourian R, Angelova A (2019) Depth prediction without the sensors: Leveraging structure for unsupervised learning from monocular videos. In: Proceedings of the AAAI conference on artificial intelligence, vol 33, pp 8001\u20138008","DOI":"10.1609\/aaai.v33i01.33018001"},{"key":"1575_CR45","doi-asserted-by":"crossref","unstructured":"Klingner M, Term\u00f6hlen J-A, Mikolajczyk J, Fingscheidt T (2020) Self-supervised monocular depth estimation: solving the dynamic object problem by semantic guidance. In: Computer vision\u2014ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XX 16. Springer, pp 582\u2013600","DOI":"10.1007\/978-3-030-58565-5_35"},{"key":"1575_CR46","doi-asserted-by":"crossref","unstructured":"Zhou Z, Fan X, Shi P, Xin Y (2021) R-msfm: recurrent multi-scale feature modulation for monocular depth estimating. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 12777\u201312786","DOI":"10.1109\/ICCV48922.2021.01254"},{"issue":"5","key":"1575_CR47","doi-asserted-by":"publisher","first-page":"2023","DOI":"10.1109\/TNNLS.2021.3100895","volume":"33","author":"Q Sun","year":"2021","unstructured":"Sun Q, Tang Y, Zhang C, Zhao C, Qian F, Kurths J (2021) Unsupervised estimation of monocular depth and VO in dynamic environments via hybrid masks. IEEE Trans Neural Netw Learn Syst 33(5):2023\u20132033","journal-title":"IEEE Trans Neural Netw Learn Syst"}],"container-title":["Complex &amp; Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01575-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40747-024-01575-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01575-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,16]],"date-time":"2024-10-16T22:14:14Z","timestamp":1729116854000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40747-024-01575-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,10]]},"references-count":47,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["1575"],"URL":"https:\/\/doi.org\/10.1007\/s40747-024-01575-0","relation":{},"ISSN":["2199-4536","2198-6053"],"issn-type":[{"value":"2199-4536","type":"print"},{"value":"2198-6053","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8,10]]},"assertion":[{"value":"30 March 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 July 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 August 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}