{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T15:39:12Z","timestamp":1780501152245,"version":"3.54.1"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2024,6,6]],"date-time":"2024-06-06T00:00:00Z","timestamp":1717632000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,6,6]],"date-time":"2024-06-06T00:00:00Z","timestamp":1717632000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Natural Science Foundation of Shanghai","award":["22ZR1443700"],"award-info":[{"award-number":["22ZR1443700"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2024,7]]},"DOI":"10.1007\/s00138-024-01560-0","type":"journal-article","created":{"date-parts":[[2024,6,6]],"date-time":"2024-06-06T11:03:12Z","timestamp":1717671792000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Chfnet: a coarse-to-fine hierarchical refinement model for monocular depth estimation"],"prefix":"10.1007","volume":"35","author":[{"given":"Han","family":"Chen","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yongxiong","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,6,6]]},"reference":[{"key":"1560_CR1","doi-asserted-by":"crossref","unstructured":"Chibane, J., Alldieck, T., Pons-Moll, G.: Implicit functions in feature space for 3d shape reconstruction and completion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6970\u20136981 (2020)","DOI":"10.1109\/CVPR42600.2020.00700"},{"issue":"3","key":"1560_CR2","doi-asserted-by":"publisher","first-page":"1341","DOI":"10.1109\/TITS.2020.2972974","volume":"22","author":"D Feng","year":"2020","unstructured":"Feng, D., Haase-Sch\u00fctz, C., Rosenbaum, L., Hertlein, H., Glaeser, C., Timm, F., Wiesbeck, W., Dietmayer, K.: Deep multi-modal object detection and semantic segmentation for autonomous driving: Datasets, methods, and challenges. IEEE Trans. Intell. Transp. Syst. 22(3), 1341\u20131360 (2020)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"1560_CR3","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1016\/j.isprsjprs.2020.01.008","volume":"161","author":"Q Li","year":"2020","unstructured":"Li, Q., Zhu, J., Liu, J., Cao, R., Fu, H., Garibaldi, J.M., Li, Q., Liu, B., Qiu, G.: 3d map-guided single indoor image localization refinement. ISPRS J. Photogramm. Remote Sens. 161, 13\u201326 (2020)","journal-title":"ISPRS J. Photogramm. Remote Sens."},{"key":"1560_CR4","doi-asserted-by":"crossref","unstructured":"Du, R., Turner, E., Dzitsiuk, M., Prasso, L., Duarte, I., Dourgarian, J., Afonso, J., Pascoal, J., Gladstone, J., Cruces, N., : Depthlab: Real-time 3d interaction with depth maps for mobile augmented reality. In: Proceedings of the 33rd Annual ACM Symposium on User Interface Software and Technology, pp. 829\u2013843 (2020)","DOI":"10.1145\/3379337.3415881"},{"issue":"4","key":"1560_CR5","doi-asserted-by":"publisher","first-page":"1738","DOI":"10.1109\/TPAMI.2020.3032602","volume":"44","author":"H Laga","year":"2020","unstructured":"Laga, H., Jospin, L.V., Boussaid, F., Bennamoun, M.: A survey on deep learning techniques for stereo-based depth estimation. IEEE Trans. Pattern Anal. Mach. Intell. 44(4), 1738\u20131764 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1560_CR6","unstructured":"Eigen, D., Puhrsch, C., Fergus, R.: Depth map prediction from a single image using a multi-scale deep network. Adv. Neural Inf. Process. Syst. 27, 2366\u20132374 (2014)"},{"key":"1560_CR7","doi-asserted-by":"crossref","unstructured":"Fu, H., Gong, M., Wang, C., Batmanghelich, K., Tao, D.: Deep ordinal regression network for monocular depth estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2002\u20132011 (2018)","DOI":"10.1109\/CVPR.2018.00214"},{"issue":"4","key":"1560_CR8","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L-C Chen","year":"2017","unstructured":"Chen, L.-C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE Trans. Pattern Anal. Mach. Intell. 40(4), 834\u2013848 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1560_CR9","doi-asserted-by":"publisher","first-page":"104520","DOI":"10.1016\/j.imavis.2022.104520","volume":"125","author":"B Wu","year":"2022","unstructured":"Wu, B., Wang, Y.: Rich global feature guided network for monocular depth estimation. Image Vis. Comput. 125, 104520 (2022)","journal-title":"Image Vis. Comput."},{"key":"1560_CR10","doi-asserted-by":"publisher","first-page":"107901","DOI":"10.1016\/j.patcog.2021.107901","volume":"115","author":"F Xue","year":"2021","unstructured":"Xue, F., Cao, J., Zhou, Y., Sheng, F., Wang, Y., Ming, A.: Boundary-induced and scene-aggregated network for monocular depth prediction. Pattern Recogn. 115, 107901 (2021)","journal-title":"Pattern Recogn."},{"issue":"11","key":"1560_CR11","doi-asserted-by":"publisher","first-page":"4381","DOI":"10.1109\/TCSVT.2021.3049869","volume":"31","author":"M Song","year":"2021","unstructured":"Song, M., Lim, S., Kim, W.: Monocular depth estimation using laplacian pyramid-based depth residuals. IEEE Trans. Circuits Syst. Video Technol. 31(11), 4381\u20134393 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1560_CR12","unstructured":"Kim, D., Ka, W., Ahn, P., Joo, D., Chun, S., Kim, J.: Global-local path networks for monocular depth estimation with vertical cutdepth. arXiv preprint arXiv:2201.07436 (2022)"},{"key":"1560_CR13","doi-asserted-by":"crossref","unstructured":"Ranftl, R., Bochkovskiy, A., Koltun, V.: Vision transformers for dense prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12179\u201312188 (2021)","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"1560_CR14","doi-asserted-by":"crossref","unstructured":"Agarwal, A., Arora, C.: Depthformer: Multiscale vision transformer for monocular depth estimation with global local information fusion. In: 2022 IEEE International Conference on Image Processing (ICIP), pp. 3873\u20133877 (2022). IEEE","DOI":"10.1109\/ICIP46576.2022.9897187"},{"key":"1560_CR15","doi-asserted-by":"crossref","unstructured":"Li, B., Shen, C., Dai, Y., Van Den\u00a0Hengel, A., He, M.: Depth and surface normal estimation from monocular images using regression on deep features and hierarchical crfs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1119\u20131127 (2015)","DOI":"10.1109\/CVPR.2015.7298715"},{"key":"1560_CR16","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141L., Polosukhin, I.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30, 6000\u20136010 (2017)"},{"issue":"1","key":"1560_CR17","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1109\/TPAMI.2022.3152247","volume":"45","author":"K Han","year":"2022","unstructured":"Han, K., Wang, Y., Chen, H., Chen, X., Guo, J., Liu, Z., Tang, Y., Xiao, A., Xu, C., Xu, Y.: A survey on vision transformer. IEEE Trans. Pattern Anal. Mach. Intell. 45(1), 87\u2013110 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"10s","key":"1560_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3505244","volume":"54","author":"S Khan","year":"2022","unstructured":"Khan, S., Naseer, M., Hayat, M., Zamir, S.W., Khan, F.S., Shah, M.: Transformers in vision: A survey. ACM Comput. Surv. 54(10s), 1\u201341 (2022)","journal-title":"ACM Comput. Surv."},{"key":"1560_CR19","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part I 16, pp. 213\u2013229 (2020). Springer","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"1560_CR20","unstructured":"Bhat, S.F., Alhashim, I., Wonka, P.: Adabins: Depth estimation using adaptive bins. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4009\u20134018 (2021)"},{"key":"1560_CR21","doi-asserted-by":"crossref","unstructured":"Li, J., Fang, F., Mei, K., Zhang, G.: Multi-scale residual network for image super-resolution. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 517\u2013532 (2018)","DOI":"10.1007\/978-3-030-01237-3_32"},{"key":"1560_CR22","unstructured":"Chen, L.-C., Papandreou, G., Schroff, F., Adam, H.: Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587 (2017)"},{"key":"1560_CR23","unstructured":"Lee, J.H., Han, M.-K., Ko, D.W., Suh, I.H.: From big to small: Multi-scale local planar guidance for monocular depth estimation. arXiv preprint arXiv:1907.10326 (2019)"},{"key":"1560_CR24","doi-asserted-by":"publisher","first-page":"103753","DOI":"10.1016\/j.jvcir.2023.103753","volume":"90","author":"Q Wang","year":"2023","unstructured":"Wang, Q., Piao, Y.: Depth estimation of supervised monocular images based on semantic segmentation. J. Vis. Commun. Image Represent. 90, 103753 (2023)","journal-title":"J. Vis. Commun. Image Represent."},{"key":"1560_CR25","doi-asserted-by":"crossref","unstructured":"Wang, W., Xie, E., Li, X., Fan, D.-P., Song, K., Liang, D., Lu, T., Luo, P., Shao, L.: Pyramid vision transformer: A versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 568\u2013578 (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"1560_CR26","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? the kitti vision benchmark suite. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 3354\u20133361 (2012). IEEE","DOI":"10.1109\/CVPR.2012.6248074"},{"issue":"7576","key":"1560_CR27","first-page":"746","volume":"5","author":"N Silberman","year":"2012","unstructured":"Silberman, N., Hoiem, D., Kohli, P., Fergus, R.: Indoor segmentation and support inference from rgbd images. ECCV 5(7576), 746\u2013760 (2012)","journal-title":"ECCV"},{"key":"1560_CR28","doi-asserted-by":"crossref","unstructured":"Garg, R., Bg, V.K., Carneiro, G., Reid, I.: Unsupervised cnn for single view depth estimation: Geometry to the rescue. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11-14, 2016, Proceedings, Part VIII 14, pp. 740\u2013756 (2016). Springer","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"1560_CR29","unstructured":"Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., et al.: Pytorch: An imperative style, high-performance deep learning library. Adv. Neural Inf. Process. Syst. 32, 8026\u20138037 (2019)"},{"key":"1560_CR30","first-page":"12077","volume":"34","author":"E Xie","year":"2021","unstructured":"Xie, E., Wang, W., Yu, Z., Anandkumar, A., Alvarez, J.M., Luo, P.: Segformer: Simple and efficient design for semantic segmentation with transformers. Adv. Neural Inf. Process. Syst. 34, 12077\u201312090 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1560_CR31","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., Huang, Z., Karpathy, A., Khosla, A., Bernstein, M.: Imagenet large scale visual recognition challenge. Int. J. Comput. Vision 115, 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vision"},{"key":"1560_CR32","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"issue":"10","key":"1560_CR33","doi-asserted-by":"publisher","first-page":"2024","DOI":"10.1109\/TPAMI.2015.2505283","volume":"38","author":"F Liu","year":"2015","unstructured":"Liu, F., Shen, C., Lin, G., Reid, I.: Learning depth from single monocular images using deep convolutional neural fields. IEEE Trans. Pattern Anal. Mach. Intell. 38(10), 2024\u20132039 (2015)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1560_CR34","doi-asserted-by":"crossref","unstructured":"Yin, W., Liu, Y., Shen, C., Yan, Y.: Enforcing geometric constraints of virtual normal for depth prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5684\u20135693 (2019)","DOI":"10.1109\/ICCV.2019.00578"},{"issue":"7","key":"1560_CR35","doi-asserted-by":"publisher","first-page":"4841","DOI":"10.1109\/TCSVT.2021.3128505","volume":"32","author":"X Meng","year":"2021","unstructured":"Meng, X., Fan, C., Ming, Y., Yu, H.: Cornet: Context-based ordinal regression network for monocular depth estimation. IEEE Trans. Circuits Syst. Video Technol. 32(7), 4841\u20134853 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1560_CR36","doi-asserted-by":"crossref","unstructured":"Kuznietsov, Y., Stuckler, J., Leibe, B.: Semi-supervised deep learning for monocular depth map prediction. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6647\u20136655 (2017)","DOI":"10.1109\/CVPR.2017.238"},{"key":"1560_CR37","doi-asserted-by":"crossref","unstructured":"Godard, C., Mac\u00a0Aodha, O., Brostow, G.J.: Unsupervised monocular depth estimation with left-right consistency. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 270\u2013279 (2017)","DOI":"10.1109\/CVPR.2017.699"},{"key":"1560_CR38","doi-asserted-by":"crossref","unstructured":"Eigen, D., Fergus, R.: Predicting depth, surface normals and semantic labels with a common multi-scale convolutional architecture. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2650\u20132658 (2015)","DOI":"10.1109\/ICCV.2015.304"},{"key":"1560_CR39","doi-asserted-by":"crossref","unstructured":"Hao, Z., Li, Y., You, S., Lu, F.: Detail preserving depth estimation from a single image using attention guided networks. In: 2018 International Conference on 3D Vision (3DV), pp. 304\u2013313 (2018). IEEE","DOI":"10.1109\/3DV.2018.00043"},{"key":"1560_CR40","doi-asserted-by":"crossref","unstructured":"Laina, I., Rupprecht, C., Belagiannis, V., Tombari, F., Navab, N.: Deeper depth prediction with fully convolutional residual networks. In: 2016 Fourth International Conference on 3D Vision (3DV), pp. 239\u2013248 (2016). IEEE","DOI":"10.1109\/3DV.2016.32"},{"key":"1560_CR41","doi-asserted-by":"crossref","unstructured":"Yuan, W., Gu, X., Dai, Z., Zhu, S., Tan, P.: New crfs: Neural window fully-connected crfs for monocular depth estimation. arXiv preprint arXiv:2203.01502 (2022)","DOI":"10.1109\/CVPR52688.2022.00389"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01560-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-024-01560-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01560-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T07:31:23Z","timestamp":1732174283000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-024-01560-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,6]]},"references-count":41,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2024,7]]}},"alternative-id":["1560"],"URL":"https:\/\/doi.org\/10.1007\/s00138-024-01560-0","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"value":"0932-8092","type":"print"},{"value":"1432-1769","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,6,6]]},"assertion":[{"value":"17 January 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 April 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 May 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 June 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Author Han Chen declares that he has no Conflict of interest. Author Yongxiong Wang declares that he has no Conflict of interest. The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"All authors volunteer to participate in this study. All authors have understood the purpose and process of the study. All authors read and approved the final manuscript.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"All authors consent to the use of non-identifying information provided in this study for scholarly publications and presentations","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"This article does not contain any studies with human participants or animals performed by any of the authors.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}}],"article-number":"78"}}