{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T20:13:19Z","timestamp":1773519199406,"version":"3.50.1"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,2,14]],"date-time":"2025-02-14T00:00:00Z","timestamp":1739491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,14]],"date-time":"2025-02-14T00:00:00Z","timestamp":1739491200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2023M741411"],"award-info":[{"award-number":["2023M741411"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Postdoctoral Fellowship Program of CPSF","award":["GZC20240608"],"award-info":[{"award-number":["GZC20240608"]}]},{"name":"Jiangsu Funding Program for Excellent Postdoctoral Talent","award":["2024ZB488"],"award-info":[{"award-number":["2024ZB488"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62173160"],"award-info":[{"award-number":["62173160"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1007\/s00530-025-01700-0","type":"journal-article","created":{"date-parts":[[2025,2,14]],"date-time":"2025-02-14T19:17:16Z","timestamp":1739560636000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Self-supervised monocular depth estimation via multiple bilateral consistency"],"prefix":"10.1007","volume":"31","author":[{"given":"Zhengyang","family":"Lu","sequence":"first","affiliation":[]},{"given":"Ying","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,2,14]]},"reference":[{"key":"1700_CR1","doi-asserted-by":"crossref","unstructured":"Cordts, M., Omran, M., Ramos, S., Rehfeld, T., Enzweiler, M., Benenson, R., Franke, U., Roth, S., Schiele, B.: The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3213\u20133223 (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"1700_CR2","doi-asserted-by":"crossref","unstructured":"Hu, Q., Yang, B., Khalid, S., Xiao, W., Trigoni, N., Markham, A.: Towards semantic segmentation of urban-scale 3d point clouds: a dataset, benchmarks and challenges. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4977\u20134987 (2021)","DOI":"10.1109\/CVPR46437.2021.00494"},{"key":"1700_CR3","doi-asserted-by":"publisher","first-page":"626","DOI":"10.1016\/j.neucom.2022.01.005","volume":"493","author":"Y Mo","year":"2022","unstructured":"Mo, Y., Wu, Y., Yang, X., Liu, F., Liao, Y.: Review the state-of-the-art technologies of semantic segmentation based on deep learning. Neurocomputing 493, 626\u2013646 (2022)","journal-title":"Neurocomputing"},{"issue":"1","key":"1700_CR4","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1145\/2398356.2398381","volume":"56","author":"J Shotton","year":"2013","unstructured":"Shotton, J., Sharp, T., Kipman, A., Fitzgibbon, A., Finocchio, M., Blake, A., Cook, M., Moore, R.: Real-time human pose recognition in parts from single depth images. Commun. ACM 56(1), 116\u2013124 (2013)","journal-title":"Commun. ACM"},{"key":"1700_CR5","unstructured":"Eigen, D., Puhrsch, C., Fergus, R.: Depth map prediction from a single image using a multi-scale deep network. In: Advances in Neural Information Processing Systems, vol. 27 (2014)"},{"key":"1700_CR6","doi-asserted-by":"crossref","unstructured":"Shelhamer, E., Barron, J.T., Darrell, T.: Scene intrinsics and depth from a single image. In: Proceedings of the IEEE International Conference on Computer Vision Workshops, pp. 37\u201344 (2015)","DOI":"10.1109\/ICCVW.2015.39"},{"key":"1700_CR7","doi-asserted-by":"crossref","unstructured":"Laina, I., Rupprecht, C., Belagiannis, V., Tombari, F., Navab, N.: Deeper depth prediction with fully convolutional residual networks. In: 2016 Fourth International Conference on 3D Vision (3DV), pp. 239\u2013248 (2016). IEEE","DOI":"10.1109\/3DV.2016.32"},{"key":"1700_CR8","doi-asserted-by":"crossref","unstructured":"Fu, H., Gong, M., Wang, C., Batmanghelich, K., Tao, D.: Deep ordinal regression network for monocular depth estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2002\u20132011 (2018)","DOI":"10.1109\/CVPR.2018.00214"},{"issue":"10","key":"1700_CR9","doi-asserted-by":"publisher","first-page":"2024","DOI":"10.1109\/TPAMI.2015.2505283","volume":"38","author":"F Liu","year":"2015","unstructured":"Liu, F., Shen, C., Lin, G., Reid, I.: Learning depth from single monocular images using deep convolutional neural fields. IEEE Trans. Pattern Anal. Mach. Intell. 38(10), 2024\u20132039 (2015)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"8","key":"1700_CR10","doi-asserted-by":"publisher","first-page":"3691","DOI":"10.1109\/TIP.2018.2821979","volume":"27","author":"Z Zhang","year":"2018","unstructured":"Zhang, Z., Xu, C., Yang, J., Gao, J., Cui, Z.: Progressive hard-mining network for monocular depth estimation. IEEE Trans. Image Process. 27(8), 3691\u20133702 (2018)","journal-title":"IEEE Trans. Image Process."},{"key":"1700_CR11","doi-asserted-by":"crossref","unstructured":"Li, B., Shen, C., Dai, Y., Van Den\u00a0Hengel, A., He, M.: Depth and surface normal estimation from monocular images using regression on deep features and hierarchical CRFS. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1119\u20131127 (2015)","DOI":"10.1109\/CVPR.2015.7298715"},{"key":"1700_CR12","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? The kitti vision benchmark suite. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 3354\u20133361. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"1700_CR13","doi-asserted-by":"publisher","first-page":"100057","DOI":"10.1016\/j.array.2021.100057","volume":"10","author":"A Gupta","year":"2021","unstructured":"Gupta, A., Anpalagan, A., Guan, L., Khwaja, A.S.: Deep learning for object detection and scene perception in self-driving cars: survey, challenges, and open issues. Array 10, 100057 (2021)","journal-title":"Array"},{"key":"1700_CR14","doi-asserted-by":"crossref","unstructured":"Godard, C., Mac\u00a0Aodha, O., Brostow, G.J.: Unsupervised monocular depth estimation with left-right consistency. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 270\u2013279 (2017)","DOI":"10.1109\/CVPR.2017.699"},{"key":"1700_CR15","doi-asserted-by":"crossref","unstructured":"Zhou, T., Brown, M., Snavely, N., Lowe, D.G.: Unsupervised learning of depth and ego-motion from video. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.700"},{"key":"1700_CR16","doi-asserted-by":"crossref","unstructured":"Yin, Z., Shi, J.: Geonet: Unsupervised learning of dense depth, optical flow and camera pose. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1983\u20131992 (2018)","DOI":"10.1109\/CVPR.2018.00212"},{"issue":"11","key":"1700_CR17","doi-asserted-by":"publisher","first-page":"4037","DOI":"10.1109\/TPAMI.2020.2992393","volume":"43","author":"L Jing","year":"2020","unstructured":"Jing, L., Tian, Y.: Self-supervised visual feature learning with deep neural networks: a survey. IEEE Trans. Pattern Anal. Mach. Intell. 43(11), 4037\u20134058 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1700_CR18","doi-asserted-by":"crossref","unstructured":"Casser, V., Pirk, S., Mahjourian, R., Angelova, A.: Depth prediction without the sensors: leveraging structure for unsupervised learning from monocular videos. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 8001\u20138008 (2019)","DOI":"10.1609\/aaai.v33i01.33018001"},{"key":"1700_CR19","doi-asserted-by":"crossref","unstructured":"Wong, A., Soatto, S.: Bilateral cyclic constraint and adaptive regularization for unsupervised monocular depth prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5644\u20135653 (2019)","DOI":"10.1109\/CVPR.2019.00579"},{"key":"1700_CR20","doi-asserted-by":"crossref","unstructured":"Godard, C., Mac\u00a0Aodha, O., Firman, M., Brostow, G.J.: Digging into self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3828\u20133838 (2019)","DOI":"10.1109\/ICCV.2019.00393"},{"key":"1700_CR21","doi-asserted-by":"crossref","unstructured":"Guizilini, V., Ambrus, R., Pillai, S., Raventos, A., Gaidon, A.: 3d packing for self-supervised monocular depth estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2485\u20132494 (2020)","DOI":"10.1109\/CVPR42600.2020.00256"},{"key":"1700_CR22","doi-asserted-by":"crossref","unstructured":"Klingner, M., Term\u00f6hlen, J.-A., Mikolajczyk, J., Fingscheidt, T.: Self-supervised monocular depth estimation: solving the dynamic object problem by semantic guidance. In: European Conference on Computer Vision, pp. 582\u2013600. Springer, Berlin (2020)","DOI":"10.1007\/978-3-030-58565-5_35"},{"key":"1700_CR23","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Fan, X., Shi, P., Xin, Y.: R-msfm: Recurrent multi-scale feature modulation for monocular depth estimating. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12777\u201312786 (2021)","DOI":"10.1109\/ICCV48922.2021.01254"},{"key":"1700_CR24","doi-asserted-by":"publisher","first-page":"638","DOI":"10.1109\/LSP.2021.3065203","volume":"28","author":"K Li","year":"2021","unstructured":"Li, K., Fu, Z., Wang, H., Chen, Z., Guo, Y.: Adv-depth: self-supervised monocular depth estimation with an adversarial loss. IEEE Signal Process. Lett. 28, 638\u2013642 (2021)","journal-title":"IEEE Signal Process. Lett."},{"key":"1700_CR25","unstructured":"Bae, J., Moon, S., Im, S.: Monoformer: Towards Generalization of Self-Supervised Monocular Depth Estimation with Transformers. arXiv:2205.11083 (2022)"},{"key":"1700_CR26","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1016\/j.neucom.2019.12.108","volume":"389","author":"W Zhao","year":"2020","unstructured":"Zhao, W., Zhang, S., Guan, Z., Luo, H., Tang, L., Peng, J., Fan, J.: 6d object pose estimation via viewpoint relation reasoning. Neurocomputing 389, 9\u201317 (2020)","journal-title":"Neurocomputing"},{"key":"1700_CR27","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1016\/j.neucom.2020.02.044","volume":"396","author":"X Ye","year":"2020","unstructured":"Ye, X., Ji, X., Sun, B., Chen, S., Wang, Z., Li, H.: DRM-SLAM: towards dense reconstruction of monocular slam with scene depth fusion. Neurocomputing 396, 76\u201391 (2020)","journal-title":"Neurocomputing"},{"issue":"4","key":"1700_CR28","doi-asserted-by":"publisher","first-page":"80","DOI":"10.1109\/MRA.2011.943233","volume":"18","author":"D Scaramuzza","year":"2011","unstructured":"Scaramuzza, D., Fraundorfer, F.: Visual odometry [tutorial]. IEEE Robot. Autom. Mag. 18(4), 80\u201392 (2011)","journal-title":"IEEE Robot. Autom. Mag."},{"key":"1700_CR29","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., Kavukcuoglu, K.: Spatial Transformer Networks. arXiv:1506.02025 (2015)"},{"issue":"4","key":"1700_CR30","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."},{"key":"1700_CR31","doi-asserted-by":"crossref","unstructured":"Zhu, J.-Y., Park, T., Isola, P., Efros, A.A.: Unpaired image-to-image translation using cycle-consistent adversarial networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2223\u20132232 (2017)","DOI":"10.1109\/ICCV.2017.244"},{"issue":"11","key":"1700_CR32","doi-asserted-by":"publisher","first-page":"2144","DOI":"10.1109\/TPAMI.2014.2316835","volume":"36","author":"K Karsch","year":"2014","unstructured":"Karsch, K., Liu, C., Kang, S.B.: Depth transfer: depth extraction from video using non-parametric sampling. IEEE Trans. Pattern Anal. Mach. Intell. 36(11), 2144\u20132158 (2014)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1700_CR33","first-page":"231","volume":"132","author":"A Geiger","year":"2013","unstructured":"Geiger, A., Lenz, P., Stiller, C., Urtasun, R.: Vision meets robotics: the kitti dataset. Int. J. Robot. Res. 132, 231\u20131237 (2013)","journal-title":"Int. J. Robot. Res."},{"key":"1700_CR34","unstructured":"Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., et\u00a0al.: Pytorch: an imperative style, high-performance deep learning library. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"1700_CR35","unstructured":"Simonyan, K., Zisserman, A.: Very Deep Convolutional Networks for Large-Scale Image Recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"1700_CR36","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01700-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-01700-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-01700-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,21]],"date-time":"2025-04-21T19:34:57Z","timestamp":1745264097000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-01700-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,14]]},"references-count":36,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["1700"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-01700-0","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,14]]},"assertion":[{"value":"16 February 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 January 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 February 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"111"}}