{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T07:50:45Z","timestamp":1781509845216,"version":"3.54.1"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030585792","type":"print"},{"value":"9783030585808","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58580-8_38","type":"book-chapter","created":{"date-parts":[[2020,12,2]],"date-time":"2020-12-02T07:03:09Z","timestamp":1606892589000},"page":"644-660","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":258,"title":["RTM3D: Real-Time Monocular 3D\u00a0Detection from Object Keypoints for\u00a0Autonomous Driving"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0931-1932","authenticated-orcid":false,"given":"Peixuan","family":"Li","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7772-8652","authenticated-orcid":false,"given":"Huaici","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8432-711X","authenticated-orcid":false,"given":"Pengfei","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9000-3963","authenticated-orcid":false,"given":"Feidao","family":"Cao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2020,12,3]]},"reference":[{"key":"38_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"404","DOI":"10.1007\/11744023_32","volume-title":"Computer Vision \u2013 ECCV 2006","author":"H Bay","year":"2006","unstructured":"Bay, H., Tuytelaars, T., Van Gool, L.: SURF: speeded up robust features. In: Leonardis, A., Bischof, H., Pinz, A. (eds.) ECCV 2006. LNCS, vol. 3951, pp. 404\u2013417. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11744023_32"},{"key":"38_CR2","doi-asserted-by":"crossref","unstructured":"Behl, A., Hosseini Jafari, O., Karthik Mustikovela, S., Abu Alhaija, H., Rother, C., Geiger, A.: Bounding boxes, segmentations and object coordinates: how important is recognition for 3D scene flow estimation in autonomous driving scenarios? In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2574\u20132583 (2017)","DOI":"10.1109\/ICCV.2017.281"},{"key":"38_CR3","doi-asserted-by":"crossref","unstructured":"Brazil, G., Liu, X.: M3D-RPN: monocular 3D region proposal network for object detection. In: Proceedings of the IEEE International Conference on Computer Vision, Seoul, South Korea (2019)","DOI":"10.1109\/ICCV.2019.00938"},{"key":"38_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1007\/978-3-319-46493-0_22","volume-title":"Computer Vision \u2013 ECCV 2016","author":"Z Cai","year":"2016","unstructured":"Cai, Z., Fan, Q., Feris, R.S., Vasconcelos, N.: A unified multi-scale deep convolutional neural network for fast object detection. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 354\u2013370. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_22"},{"key":"38_CR5","doi-asserted-by":"crossref","unstructured":"Chabot, F., Chaouch, M., Rabarisoa, J., Teuli\u00e8re, C., Chateau, T.: Deep MANTA: a coarse-to-fine many-task network for joint 2D and 3D vehicle analysis from monocular image. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2040\u20132049 (2017)","DOI":"10.1109\/CVPR.2017.198"},{"key":"38_CR6","doi-asserted-by":"crossref","unstructured":"Chen, X., Kundu, K., Zhang, Z., Ma, H., Fidler, S., Urtasun, R.: Monocular 3D object detection for autonomous driving. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2147\u20132156 (2016)","DOI":"10.1109\/CVPR.2016.236"},{"issue":"5","key":"38_CR7","doi-asserted-by":"publisher","first-page":"1259","DOI":"10.1109\/TPAMI.2017.2706685","volume":"40","author":"X Chen","year":"2017","unstructured":"Chen, X., Kundu, K., Zhu, Y., Ma, H., Fidler, S., Urtasun, R.: 3D object proposals using stereo imagery for accurate object class detection. IEEE Trans. Pattern Anal. Mach. Intell. 40(5), 1259\u20131272 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"38_CR8","doi-asserted-by":"crossref","unstructured":"Chen, X., Ma, H., Wan, J., Li, B., Xia, T.: Multi-view 3D object detection network for autonomous driving. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1907\u20131915 (2017)","DOI":"10.1109\/CVPR.2017.691"},{"key":"38_CR9","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? The KITTI vision benchmark suite. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 3354\u20133361. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"38_CR10","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"38_CR11","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 580\u2013587 (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"38_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"38_CR13","doi-asserted-by":"crossref","unstructured":"He, T., Soatto, S.: Mono3D++: monocular 3D vehicle detection with two-scale 3D hypotheses and task priors. arXiv preprint arXiv:1901.03446 (2019)","DOI":"10.1609\/aaai.v33i01.33018409"},{"key":"38_CR14","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv. Learning (2014)"},{"key":"38_CR15","doi-asserted-by":"crossref","unstructured":"Kong, T., Sun, F., Liu, H., Jiang, Y., Shi, J.: FoveaBox: beyond anchor-based object detector. arXiv preprint arXiv:1904.03797 (2019)","DOI":"10.1109\/TIP.2020.3002345"},{"key":"38_CR16","doi-asserted-by":"crossref","unstructured":"Ku, J., Pon, A.D., Waslander, S.L.: Monocular 3D object detection leveraging accurate proposals and shape reconstruction. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 11867\u201311876 (2019)","DOI":"10.1109\/CVPR.2019.01214"},{"key":"38_CR17","unstructured":"Kummerle, R., Grisetti, G., Strasdat, H., Konolige, K., Burgard, W.: G 2 o: a general framework for graph optimization, pp. 3607\u20133613 (2011)"},{"key":"38_CR18","doi-asserted-by":"crossref","unstructured":"Law, H., Deng, J.: CornerNet: detecting objects as paired keypoints. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 734\u2013750 (2018)","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"38_CR19","doi-asserted-by":"crossref","unstructured":"Li, B., Ouyang, W., Sheng, L., Zeng, X., Wang, X.: GS3D: an efficient 3D object detection framework for autonomous driving. In: The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2019","DOI":"10.1109\/CVPR.2019.00111"},{"key":"38_CR20","doi-asserted-by":"crossref","unstructured":"Li, P., Chen, X., Shen, S.: Stereo R-CNN based 3D object detection for autonomous driving. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7644\u20137652 (2019)","DOI":"10.1109\/CVPR.2019.00783"},{"key":"38_CR21","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132125 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"38_CR22","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"38_CR23","doi-asserted-by":"crossref","unstructured":"Liu, L., Lu, J., Xu, C., Tian, Q., Zhou, J.: Deep fitting degree scoring network for monocular 3D object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1057\u20131066 (2019)","DOI":"10.1109\/CVPR.2019.00115"},{"issue":"2","key":"38_CR24","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe, D.G.: Distinctive image features from scale-invariant keypoints. Int. J. Comput. Vision 60(2), 91\u2013110 (2004)","journal-title":"Int. J. Comput. Vision"},{"key":"38_CR25","doi-asserted-by":"crossref","unstructured":"Ma, X., Wang, Z., Li, H., Zhang, P., Ouyang, W., Fan, X.: Accurate monocular 3D object detection via color-embedded 3D reconstruction for autonomous driving. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 6851\u20136860 (2019)","DOI":"10.1109\/ICCV.2019.00695"},{"key":"38_CR26","doi-asserted-by":"crossref","unstructured":"Mousavian, A., Anguelov, D., Flynn, J., Kosecka, J.: 3D bounding box estimation using deep learning and geometry. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7074\u20137082 (2017)","DOI":"10.1109\/CVPR.2017.597"},{"key":"38_CR27","doi-asserted-by":"crossref","unstructured":"Murthy, J.K., Krishna, G.S., Chhaya, F., Krishna, K.M.: Reconstructing vehicles from a single image: shape priors for road scene understanding. In: 2017 IEEE International Conference on Robotics and Automation (ICRA), pp. 724\u2013731. IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989089"},{"key":"38_CR28","doi-asserted-by":"crossref","unstructured":"Naiden, A., Paunescu, V., Kim, G., Jeon, B., Leordeanu, M.: Shift R-CNN: deep monocular 3D object detection with closed-form geometric constraints. In: 2019 IEEE International Conference on Image Processing (ICIP), pp. 61\u201365 (2019)","DOI":"10.1109\/ICIP.2019.8803397"},{"key":"38_CR29","doi-asserted-by":"crossref","unstructured":"Qi, C.R., Liu, W., Wu, C., Su, H., Guibas, L.J.: Frustum PointNets for 3D object detection from RGB-D data. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 918\u2013927 (2018)","DOI":"10.1109\/CVPR.2018.00102"},{"key":"38_CR30","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: PointNet: deep learning on point sets for 3D classification and segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 652\u2013660 (2017)"},{"key":"38_CR31","doi-asserted-by":"crossref","unstructured":"Qin, Z., Wang, J., Lu, Y.: MonoGRNet: a geometric reasoning network for monocular 3D object localization. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 8851\u20138858 (2019)","DOI":"10.1609\/aaai.v33i01.33018851"},{"key":"38_CR32","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, pp. 91\u201399 (2015)"},{"issue":"6","key":"38_CR33","first-page":"1281","volume":"40","author":"C Rubino","year":"2017","unstructured":"Rubino, C., Crocco, M., Del Bue, A.: 3D object localisation from multi-view image detections. IEEE Trans. Pattern Anal. Mach. Intell. 40(6), 1281\u20131294 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"38_CR34","doi-asserted-by":"crossref","unstructured":"Simonelli, A., Bulo, S.R., Porzi, L., L\u00f3pez-Antequera, M., Kontschieder, P.: Disentangling monocular 3D object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1991\u20131999 (2019)","DOI":"10.1109\/ICCV.2019.00208"},{"key":"38_CR35","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. Computer Science (2014)"},{"key":"38_CR36","doi-asserted-by":"crossref","unstructured":"Tian, Z., Shen, C., Chen, H., He, T.: FCOS: fully convolutional one-stage object detection. arXiv preprint arXiv:1904.01355 (2019)","DOI":"10.1109\/ICCV.2019.00972"},{"key":"38_CR37","doi-asserted-by":"crossref","unstructured":"Wang, Y., Chao, W.L., Garg, D., Hariharan, B., Campbell, M., Weinberger, K.Q.: Pseudo-lidar from visual depth estimation: bridging the gap in 3D object detection for autonomous driving. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8445\u20138453 (2019)","DOI":"10.1109\/CVPR.2019.00864"},{"key":"38_CR38","doi-asserted-by":"crossref","unstructured":"Xiang, Y., Choi, W., Lin, Y., Savarese, S.: Data-driven 3D voxel patterns for object category recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1903\u20131911 (2015)","DOI":"10.1109\/CVPR.2015.7298800"},{"key":"38_CR39","doi-asserted-by":"crossref","unstructured":"Xiang, Y., Choi, W., Lin, Y., Savarese, S.: Subcategory-aware convolutional neural networks for object proposals and detection, pp. 924\u2013933 (2017)","DOI":"10.1109\/WACV.2017.108"},{"key":"38_CR40","doi-asserted-by":"crossref","unstructured":"Xu, B., Chen, Z.: Multi-level fusion based 3D object detection from monocular images. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2345\u20132353 (2018)","DOI":"10.1109\/CVPR.2018.00249"},{"key":"38_CR41","doi-asserted-by":"crossref","unstructured":"Yang, B., Luo, W., Urtasun, R.: PIXOR: real-time 3D object detection from point clouds. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7652\u20137660 (2018)","DOI":"10.1109\/CVPR.2018.00798"},{"key":"38_CR42","doi-asserted-by":"publisher","first-page":"925","DOI":"10.1109\/TRO.2019.2909168","volume":"35","author":"S Yang","year":"2019","unstructured":"Yang, S., Scherer, S.: CubeSLAM: monocular 3-D object slam. IEEE Trans. Robot. 35, 925\u2013938 (2019)","journal-title":"IEEE Trans. Robot."},{"key":"38_CR43","doi-asserted-by":"crossref","unstructured":"Yu, F., Wang, D., Shelhamer, E., Darrell, T.: Deep layer aggregation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2403\u20132412 (2018)","DOI":"10.1109\/CVPR.2018.00255"},{"key":"38_CR44","doi-asserted-by":"crossref","unstructured":"Zeeshan Zia, M., Stark, M., Schindler, K.: Are cars just 3D boxes?-Jointly estimating the 3D shape of multiple objects. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3678\u20133685 (2014)","DOI":"10.1109\/CVPR.2014.470"},{"key":"38_CR45","unstructured":"Zhou, X., Wang, D., Kr\u00e4henb\u00fchl, P.: Objects as points. arXiv preprint arXiv:1904.07850 (2019)"},{"key":"38_CR46","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Tuzel, O.: VoxelNet: end-to-end learning for point cloud based 3D object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4490\u20134499 (2018)","DOI":"10.1109\/CVPR.2018.00472"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58580-8_38","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:14:52Z","timestamp":1733098492000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58580-8_38"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585792","9783030585808"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58580-8_38","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"3 December 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic. From the ECCV Workshops 249 full papers, 18 short papers, and 21 further contributions were published out of a total of 467 submissions.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}