{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T16:07:36Z","timestamp":1775146056284,"version":"3.50.1"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2023,6,21]],"date-time":"2023-06-21T00:00:00Z","timestamp":1687305600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,6,21]],"date-time":"2023-06-21T00:00:00Z","timestamp":1687305600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Donghai Laboratory","award":["DH-2022ZY0002"],"award-info":[{"award-number":["DH-2022ZY0002"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2024,3]]},"DOI":"10.1007\/s00371-023-02884-0","type":"journal-article","created":{"date-parts":[[2023,6,21]],"date-time":"2023-06-21T08:02:55Z","timestamp":1687334575000},"page":"1757-1773","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Leveraging front and side cues for occlusion handling in monocular 3D object detection"],"prefix":"10.1007","volume":"40","author":[{"given":"Yuying","family":"Song","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zecheng","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingxuan","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3274-6806","authenticated-orcid":false,"given":"Chunyi","family":"Song","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiwei","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,6,21]]},"reference":[{"key":"2884_CR1","doi-asserted-by":"publisher","first-page":"3765","DOI":"10.1007\/s00371-021-02217-z","volume":"38","author":"H Zhao","year":"2022","unstructured":"Zhao, H., Yang, D., Yu, J.: 3D target detection using dual domain attention and SIFT operator in indoor scenes. Vis. Comput. 38, 3765\u20133774 (2022)","journal-title":"Vis. Comput."},{"key":"2884_CR2","doi-asserted-by":"crossref","unstructured":"Chen, Q., Sun, L., Wang, Z., Jia, K., Yuille, A.: Object as hotspots: an anchor-free 3D object detection approach via firing of hotspots. In: European Conference on Computer Vision. Springer, Berlin (2020)","DOI":"10.1007\/978-3-030-58589-1_5"},{"key":"2884_CR3","doi-asserted-by":"crossref","unstructured":"Shi, S., Guo, C., Jiang, L., Wang, Z., Shi, J., Wang, X., Li, H.: PV-RCNN: point-voxel feature set abstraction for 3D object detection. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Seattle, WA, USA, pp. 10526\u201310535 (2020)","DOI":"10.1109\/CVPR42600.2020.01054"},{"key":"2884_CR4","doi-asserted-by":"crossref","unstructured":"Wu, P., Gu, L., Yan, X., Xie, H., Wang, F.L., Cheng, G., Wei, M.: PV-RCNN++: semantical point-voxel feature interaction for 3D object detection. Vis. Comput. 1\u201316 (2022)","DOI":"10.1007\/s00371-022-02672-2"},{"key":"2884_CR5","doi-asserted-by":"crossref","unstructured":"Ji, C., Liu, G., Zhao, D.: Stereo 3D object detection via instance depth prior guidance and adaptive spatial feature aggregation. Vis. Comput. 1\u201312 (2022)","DOI":"10.1007\/s00371-022-02607-x"},{"key":"2884_CR6","doi-asserted-by":"publisher","first-page":"399","DOI":"10.1007\/s00371-018-1472-3","volume":"35","author":"R Wang","year":"2019","unstructured":"Wang, R., Liang, Y., Xu, J.W., He, Z.H.: Cascading classifier with discriminative multi-features for a specific 3D object real-time detection. Vis. Comput. 35, 399\u2013414 (2019)","journal-title":"Vis. Comput."},{"key":"2884_CR7","doi-asserted-by":"crossref","unstructured":"Ma, X., Liu, S., Xia, Z., Zhang, H., Zeng, X., Ouyang, W.: Rethinking pseudo-lidar representation. In: European Conference on Computer Vision. Springer, pp. 311\u2013327 (2020)","DOI":"10.1007\/978-3-030-58601-0_19"},{"key":"2884_CR8","doi-asserted-by":"crossref","unstructured":"Manhardt, F., Kehl, W., Gaidon, A.: ROI-10D: Monocular lifting of 2D detection to 6D pose and metric shape. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Long Beach, CA, USA, pp. 2064\u20132073 (2019)","DOI":"10.1109\/CVPR.2019.00217"},{"key":"2884_CR9","doi-asserted-by":"crossref","unstructured":"Cheng, T., Sun, L., Zhang, J., Hou, D., Shi, Q., Chen, J.: Based on real and virtual datasets adaptive joint training in multi-modal networks with applications in monocular 3D target detection. Vis. Comput. 1\u201311 (2022)","DOI":"10.1007\/s00371-022-02734-5"},{"key":"2884_CR10","doi-asserted-by":"crossref","unstructured":"Brazil, G., Liu, X.: M3D-RPN: monocular 3D region proposal network for object detection. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV). IEEE, Seoul, Korea (South), pp. 9286\u20139295 (2019)","DOI":"10.1109\/ICCV.2019.00938"},{"key":"2884_CR11","unstructured":"Wang, T., Xinge, Z., Pang, J., Lin, D.: Probabilistic and geometric depth: detecting objects in perspective. In: Conference on Robot Learning. PMLR, pp. 1475\u20131485 (2022)"},{"key":"2884_CR12","doi-asserted-by":"crossref","unstructured":"Xu, B., Chen, Z.: Multi-level fusion based 3D object detection from monocular \u0131mages. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. IEEE, Salt Lake City, UT, USA, pp. 2345\u20132353 (2018)","DOI":"10.1109\/CVPR.2018.00249"},{"key":"2884_CR13","doi-asserted-by":"crossref","unstructured":"Wang, Y., Chao, W.-L., Garg, D., Hariharan, B., Campbell, M., Weinberger, K.Q.: Pseudo-LiDAR from visual depth estimation: bridging the gap in 3D object detection for autonomous driving. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Long Beach, CA, USA, pp. 8437\u20138445 (2019)","DOI":"10.1109\/CVPR.2019.00864"},{"key":"2884_CR14","unstructured":"Roddick, T., Kendall, A., Cipolla, R.: Orthographic Feature Transform for Monocular 3D Object Detection (2018)"},{"key":"2884_CR15","doi-asserted-by":"crossref","unstructured":"Ouyang, E., Zhang, L., Chen, M., Arnab, A., Fu, Y.: Dynamic depth fusion and transformation for monocular 3d object detection. In: Proceedings of the Asian Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-69525-5_21"},{"key":"2884_CR16","doi-asserted-by":"crossref","unstructured":"Dai, J., Qi, H., Xiong, Y., Li, Y., Zhang, G., Hu, H., Wei, Y.: Deformable Convolutional Networks. p 10 (2017)","DOI":"10.1109\/ICCV.2017.89"},{"key":"2884_CR17","doi-asserted-by":"crossref","unstructured":"Chen, Y., Tai, L., Sun, K., Li, M.: MonoPair: monocular 3D object detection using pairwise spatial relationships. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Seattle, WA, USA, pp. 12090\u201312099 (2020)","DOI":"10.1109\/CVPR42600.2020.01211"},{"key":"2884_CR18","doi-asserted-by":"crossref","unstructured":"Wang, H., Zhu, Y., Green, B., Adam, H., Yuille, A., Chen, L.-C.: Axial-deeplab: Stand-alone axial-attention for panoptic segmentation. In: European Conference on Computer Vision. Springer, pp. 108\u2013126 (2020)","DOI":"10.1007\/978-3-030-58548-8_7"},{"key":"2884_CR19","doi-asserted-by":"publisher","first-page":"1231","DOI":"10.1177\/0278364913491297","volume":"32","author":"A Geiger","year":"2013","unstructured":"Geiger, A., Lenz, P., Stiller, C., Urtasun, R.: Vision meets robotics: the KITTI dataset. Int. J. Robot. Res. 32, 1231\u20131237 (2013). https:\/\/doi.org\/10.1177\/0278364913491297","journal-title":"Int. J. Robot. Res."},{"key":"2884_CR20","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 39, 1137\u20131149 (2017). https:\/\/doi.org\/10.1109\/TPAMI.2016.2577031","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2884_CR21","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Dollar, P., Girshick, R.: Mask R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV) (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"2884_CR22","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: 2014 IEEE Conference on Computer Vision and Pattern Recognition. IEEE, Columbus, OH, USA, pp. 580\u2013587 (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"2884_CR23","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast r-cnn. In: Proceedings of the IEEE \u0130nternational Conference on Computer Vision, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"2884_CR24","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-319-46448-0_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"W Liu","year":"2016","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.-Y., Berg, A.C.: SSD: single shot multibox detector. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) Computer Vision \u2013 ECCV 2016, pp. 21\u201337. Springer International Publishing, Cham (2016)"},{"key":"2884_CR25","unstructured":"Redmon, J., Farhadi, A.: YOLOv3: An Incremental Improvement (2018) arXiv:180402767 [cs]"},{"key":"2884_CR26","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1007\/s00371-019-01787-3","volume":"37","author":"L Wei","year":"2021","unstructured":"Wei, L., Cui, W., Hu, Z., Sun, H., Hou, S.: A single-shot multi-level feature reused neural network for object detection. Vis. Comput. 37, 133\u2013142 (2021)","journal-title":"Vis. Comput."},{"key":"2884_CR27","doi-asserted-by":"crossref","unstructured":"Zhang, T., Cao, Y., Zhang, L., Li, X.: Efficient feature fusion network based on center and scale prediction for pedestrian detection. Vis. Comput. 1\u20138 (2022)","DOI":"10.1007\/s00371-022-02528-9"},{"key":"2884_CR28","doi-asserted-by":"crossref","unstructured":"Law, H., Deng, J.: CornerNet: detecting objects as paired keypoints. In: Proceedings of the European Conference on Computer Vision (ECCV) (2018)","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"2884_CR29","doi-asserted-by":"crossref","unstructured":"Tian, Z., Shen, C., Chen, H., He, T.: FCOS: fully convolutional one-stage object detection. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV). IEEE, Seoul, Korea (South), pp. 9626\u20139635 (2019)","DOI":"10.1109\/ICCV.2019.00972"},{"key":"2884_CR30","doi-asserted-by":"publisher","first-page":"2223","DOI":"10.1007\/s00371-021-02280-6","volume":"38","author":"M Saeidi","year":"2022","unstructured":"Saeidi, M., Arabsorkhi, A.: A novel backbone architecture for pedestrian detection based on the human visual system. Vis. Comput. 38, 2223\u20132237 (2022)","journal-title":"Vis. Comput."},{"key":"2884_CR31","doi-asserted-by":"crossref","unstructured":"Mousavian, A., Anguelov, D., Flynn, J., Kosecka, J.: 3D Bounding Box Estimation Using Deep Learning and Geometry. (2017) arXiv:1612.00496 [cs]","DOI":"10.1109\/CVPR.2017.597"},{"key":"2884_CR32","doi-asserted-by":"crossref","unstructured":"Barabanau, I., Artemov, A., Burnaev, E., Murashkin, V.: Monocular 3D Object Detection via Geometric Reasoning on Keypoints. (2019) arXiv:190505618 [cs]","DOI":"10.5220\/0009102506520659"},{"key":"2884_CR33","doi-asserted-by":"crossref","unstructured":"Wang, T., Zhu, X., Pang, J., Lin, D.: Fcos3d: fully convolutional one-stage monocular 3d object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 913\u2013922 (2021)","DOI":"10.1109\/ICCVW54120.2021.00107"},{"key":"2884_CR34","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Lu, J., Zhou, J.: Objects are different: flexible monocular 3D object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3289\u20133298 (2021)","DOI":"10.1109\/CVPR46437.2021.00330"},{"key":"2884_CR35","doi-asserted-by":"crossref","unstructured":"Li, P., Zhao, H., Liu, P., Cao, F.: Rtm3d: real-time monocular 3d detection from object keypoints for autonomous driving. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part III 16. Springer, pp. 644\u2013660 (2020)","DOI":"10.1007\/978-3-030-58580-8_38"},{"key":"2884_CR36","doi-asserted-by":"crossref","unstructured":"Peng, S., Liu, Y., Huang, Q., Zhou, X., Bao, H.: Pvnet: Pixel-wise voting network for 6dof pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4561\u20134570 (2019)","DOI":"10.1109\/CVPR.2019.00469"},{"key":"2884_CR37","doi-asserted-by":"publisher","first-page":"4050","DOI":"10.1109\/TIP.2022.3180210","volume":"31","author":"H Liu","year":"2022","unstructured":"Liu, H., Liu, H., Wang, Y., Sun, F., Huang, W.: Fine-grained multilevel fusion for anti-occlusion monocular 3d object detection. IEEE Trans. Image Process. 31, 4050\u20134061 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"2884_CR38","unstructured":"Kendall, A., Gal, Y.: What Uncertainties Do We Need in Bayesian Deep Learning for Computer Vision? (2017) arXiv:170304977 [cs]"},{"key":"2884_CR39","doi-asserted-by":"crossref","unstructured":"Lu, Y., Ma, X., Yang, L., Zhang, T., Liu, Y., Chu, Q., Yan, J., Ouyang, W.: Geometry Uncertainty Projection Network for Monocular 3D Object Detection (2021) arXiv:2107.13774 [cs]","DOI":"10.1109\/ICCV48922.2021.00310"},{"key":"2884_CR40","doi-asserted-by":"crossref","unstructured":"Liu, C., Gu, J., Kim, K., Narasimhan, S.G., Kautz. J.: Neural RGB\u00aeD sensing: depth and uncertainty from a video camera. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Long Beach, CA, USA, pp. 10978\u201310987 (2019)","DOI":"10.1109\/CVPR.2019.01124"},{"key":"2884_CR41","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"2884_CR42","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Dollar, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Honolulu, HI, pp. 936\u2013944 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"2884_CR43","doi-asserted-by":"crossref","unstructured":"Xie Z, Song Y, Wu J, Li Z, Song C, Xu Z.: MDS-net: a multi-scale depth stratification based monocular 3D object detection algorithm (2022)","DOI":"10.2139\/ssrn.4118357"},{"key":"2884_CR44","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the IEEE \u0130nternational Conference on Computer Vision, pp. 2980\u20132988(2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"2884_CR45","unstructured":"Chen, X., Kundu, K., Zhu, Y., Berneshawi, A.G., Ma, H., Fidler, S.: Urtasun R 3D Object Proposals for Accurate Object Class Detection. p. 9 (2015)"},{"key":"2884_CR46","unstructured":"Chen, K., Wang, J., Pang, J., Cao, Y., Xiong, Y., Li, X., Sun, S., Feng, W., Liu, Z., Xu, J. et al.: MMDetection: Open mmlab Detection Toolbox and Benchmark (2019) arXiv preprint arXiv:1906.07155"},{"key":"2884_CR47","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. Commun. ACM 60, 84\u201390 (2017)","journal-title":"Commun. ACM"},{"key":"2884_CR48","doi-asserted-by":"crossref","unstructured":"Ma, X., Zhang, Y., Xu, D., Zhou, D., Yi, S., Li, H., Ouyang, W.: Delving into localization errors for monocular 3D object detection. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Nashville, TN, USA, pp. 4719\u20134728 (2021)","DOI":"10.1109\/CVPR46437.2021.00469"},{"key":"2884_CR49","doi-asserted-by":"crossref","unstructured":"Kumar, A., Brazil, G., Liu, X.: GrooMeD-NMS: grouped mathematically differentiable nms for monocular 3D object detection. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Nashville, TN, USA, pp. 8969\u20138979 (2021)","DOI":"10.1109\/CVPR46437.2021.00886"},{"key":"2884_CR50","doi-asserted-by":"crossref","unstructured":"Luo, S., Dai, H., Shao, L., Ding, Y.: M3dssd: monocular 3d single stage object detector. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6145\u20136154 (2021)","DOI":"10.1109\/CVPR46437.2021.00608"},{"key":"2884_CR51","doi-asserted-by":"crossref","unstructured":"Shi, X., Ye, Q., Chen, X., Chen, C., Chen, Z., Kim, T.-K.: Geometry-based distance decomposition for monocular 3D object detection. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV). IEEE, Montreal, QC, Canada, pp. 15152\u201315161 (2021)","DOI":"10.1109\/ICCV48922.2021.01489"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-023-02884-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-023-02884-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-023-02884-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T18:22:30Z","timestamp":1729621350000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-023-02884-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,21]]},"references-count":51,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2024,3]]}},"alternative-id":["2884"],"URL":"https:\/\/doi.org\/10.1007\/s00371-023-02884-0","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,6,21]]},"assertion":[{"value":"19 April 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 June 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed consent"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Research involving human participants and\/or animals"}}]}}