{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T07:03:22Z","timestamp":1773903802805,"version":"3.50.1"},"publisher-location":"Cham","reference-count":49,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030585259","type":"print"},{"value":"9783030585266","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58526-6_6","type":"book-chapter","created":{"date-parts":[[2020,10,6]],"date-time":"2020-10-06T21:03:07Z","timestamp":1602018187000},"page":"91-107","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":41,"title":["Distance-Normalized Unified Representation for Monocular 3D Object Detection"],"prefix":"10.1007","author":[{"given":"Xuepeng","family":"Shi","sequence":"first","affiliation":[]},{"given":"Zhixiang","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Tae-Kyun","family":"Kim","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,10,7]]},"reference":[{"key":"6_CR1","doi-asserted-by":"crossref","unstructured":"Brazil, G., Liu, X.: M3D-RPN: monocular 3D region proposal network for object detection. In: ICCV, pp. 9287\u20139296 (2019)","DOI":"10.1109\/ICCV.2019.00938"},{"key":"6_CR2","doi-asserted-by":"crossref","unstructured":"Chen, X., Kundu, K., Zhang, Z., Ma, H., Fidler, S., Urtasun, R.: Monocular 3D object detection for autonomous driving. In: CVPR, pp. 2147\u20132156 (2016)","DOI":"10.1109\/CVPR.2016.236"},{"key":"6_CR3","unstructured":"Chen, X., et al.: 3D object proposals for accurate object class detection. In: NeurIPS, pp. 424\u2013432 (2015)"},{"key":"6_CR4","doi-asserted-by":"crossref","unstructured":"Dai, J., et al.: Deformable convolutional networks. In: ICCV, pp. 764\u2013773 (2017)","DOI":"10.1109\/ICCV.2017.89"},{"key":"6_CR5","doi-asserted-by":"crossref","unstructured":"Doll\u00e1r, P., Welinder, P., Perona, P.: Cascaded pose regression. In: CVPR, pp. 1078\u20131085 (2010)","DOI":"10.1109\/CVPR.2010.5540094"},{"issue":"1","key":"6_CR6","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham, M., et al.: The pascal visual object classes challenge: a retrospective. Int. J. Comput. Vision 111(1), 98\u2013136 (2015)","journal-title":"Int. J. Comput. Vision"},{"key":"6_CR7","doi-asserted-by":"crossref","unstructured":"Feng, Z., Kittler, J., Awais, M., Huber, P., Wu, X.: Wing loss for robust facial landmark localisation with convolutional neural networks. In: CVPR, pp. 2235\u20132245 (2018)","DOI":"10.1109\/CVPR.2018.00238"},{"key":"6_CR8","doi-asserted-by":"crossref","unstructured":"Fu, H., Gong, M., Wang, C., Batmanghelich, K., Tao, D.: Deep ordinal regression network for monocular depth estimation. In: CVPR, pp. 2002\u20132011 (2018)","DOI":"10.1109\/CVPR.2018.00214"},{"key":"6_CR9","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? The KITTI vision benchmark suite. In: CVPR, pp. 3354\u20133361 (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"6_CR10","doi-asserted-by":"crossref","unstructured":"Girshick, R.B.: Fast R-CNN. In: ICCV, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"6_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"6_CR12","doi-asserted-by":"crossref","unstructured":"He, Y., Zhu, C., Wang, J., Savvides, M., Zhang, X.: Bounding box regression with uncertainty for accurate object detection. In: CVPR, pp. 2888\u20132897 (2019)","DOI":"10.1109\/CVPR.2019.00300"},{"key":"6_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"816","DOI":"10.1007\/978-3-030-01264-9_48","volume-title":"Computer Vision \u2013 ECCV 2018","author":"B Jiang","year":"2018","unstructured":"Jiang, B., Luo, R., Mao, J., Xiao, T., Jiang, Y.: Acquisition of localization confidence for accurate object detection. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) Computer Vision \u2013 ECCV 2018. LNCS, vol. 11218, pp. 816\u2013832. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01264-9_48"},{"key":"6_CR14","unstructured":"J\u00f6rgensen, E., Zach, C., Kahl, F.: Monocular 3d object detection and box fitting trained end-to-end using intersection-over-union loss. CoRR abs\/1906.08070 (2019)"},{"key":"6_CR15","unstructured":"Kendall, A., Gal, Y., Cipolla, R.: Multi-task learning using uncertainty to weigh losses for scene geometry and semantics. In: CVPR, pp. 7482\u20137491 (2018)"},{"key":"6_CR16","doi-asserted-by":"crossref","unstructured":"Ku, J., Pon, A.D., Waslander, S.L.: Monocular 3D object detection leveraging accurate proposals and shape reconstruction. In: CVPR, pp. 11867\u201311876 (2019)","DOI":"10.1109\/CVPR.2019.01214"},{"key":"6_CR17","doi-asserted-by":"crossref","unstructured":"Lang, A.H., Vora, S., Caesar, H., Zhou, L., Yang, J., Beijbom, O.: PointPillars: fast encoders for object detection from point clouds. In: CVPR, pp. 12697\u201312705 (2019)","DOI":"10.1109\/CVPR.2019.01298"},{"issue":"7553","key":"6_CR18","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.E.: Deep learning. Nature 521(7553), 436\u2013444 (2015)","journal-title":"Nature"},{"key":"6_CR19","doi-asserted-by":"crossref","unstructured":"Li, B., Ouyang, W., Sheng, L., Zeng, X., Wang, X.: GS3D: an efficient 3D object detection framework for autonomous driving. In: CVPR, pp. 1019\u20131028 (2019)","DOI":"10.1109\/CVPR.2019.00111"},{"key":"6_CR20","doi-asserted-by":"crossref","unstructured":"Li, H., Lin, Z., Shen, X., Brandt, J., Hua, G.: A convolutional neural network cascade for face detection. In: CVPR, pp. 5325\u20135334 (2015)","DOI":"10.1109\/CVPR.2015.7299170"},{"key":"6_CR21","doi-asserted-by":"crossref","unstructured":"Li, Y., Chen, Y., Wang, N., Zhang, Z.: Scale-aware trident networks for object detection. In: CVPR, pp. 6054\u20136063 (2019)","DOI":"10.1109\/ICCV.2019.00615"},{"key":"6_CR22","doi-asserted-by":"crossref","unstructured":"Lin, T., Doll\u00e1r, P., Girshick, R.B., He, K., Hariharan, B., Belongie, S.J.: Feature pyramid networks for object detection. In: CVPR, pp. 936\u2013944 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"6_CR23","doi-asserted-by":"crossref","unstructured":"Lin, T., Goyal, P., Girshick, R.B., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: ICCV, pp. 2999\u20133007 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"6_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"issue":"2","key":"6_CR25","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1007\/s11263-019-01247-4","volume":"128","author":"L Liu","year":"2020","unstructured":"Liu, L., et al.: Deep learning for generic object detection: a survey. Int. J. Comput. Vision 128(2), 261\u2013318 (2020)","journal-title":"Int. J. Comput. Vision"},{"key":"6_CR26","doi-asserted-by":"crossref","unstructured":"Liu, L., Lu, J., Xu, C., Tian, Q., Zhou, J.: Deep fitting degree scoring network for monocular 3D object detection. In: CVPR, pp. 1057\u20131066 (2019)","DOI":"10.1109\/CVPR.2019.00115"},{"key":"6_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-319-46448-0_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"W Liu","year":"2016","unstructured":"Liu, W., et al.: SSD: single shot MultiBox detector. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 21\u201337. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_2"},{"key":"6_CR28","doi-asserted-by":"crossref","unstructured":"Lv, J., Shao, X., Xing, J., Cheng, C., Zhou, X.: A deep regression architecture with two-stage re-initialization for high performance facial landmark detection. In: CVPR, pp. 3691\u20133700 (2017)","DOI":"10.1109\/CVPR.2017.393"},{"key":"6_CR29","doi-asserted-by":"crossref","unstructured":"Ma, X., Wang, Z., Li, H., Ouyang, W., Zhang, P.: Accurate monocular 3D object detection via color-embedded 3D reconstruction for autonomous driving. In: ICCV, pp. 6851\u20136860 (2019)","DOI":"10.1109\/ICCV.2019.00695"},{"key":"6_CR30","doi-asserted-by":"crossref","unstructured":"Manhardt, F., Kehl, W., Gaidon, A.: ROI-10D: monocular lifting of 2D detection to 6d pose and metric shape. In: CVPR, pp. 2069\u20132078 (2019)","DOI":"10.1109\/CVPR.2019.00217"},{"key":"6_CR31","doi-asserted-by":"crossref","unstructured":"Mousavian, A., Anguelov, D., Flynn, J., Kosecka, J.: 3D bounding box estimation using deep learning and geometry. In: CVPR, pp. 5632\u20135640 (2017)","DOI":"10.1109\/CVPR.2017.597"},{"key":"6_CR32","unstructured":"Paszke, A., et al.: Pytorch: an imperative style, high-performance deep learning library. In: NeurIPS, pp. 8024\u20138035 (2019)"},{"key":"6_CR33","doi-asserted-by":"crossref","unstructured":"Qin, Z., Wang, J., Lu, Y.: MonoGRNet: a geometric reasoning network for monocular 3D object localization. In: AAAI, pp. 8851\u20138858 (2019)","DOI":"10.1609\/aaai.v33i01.33018851"},{"key":"6_CR34","unstructured":"Ren, S., He, K., Girshick, R.B., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: NeurIPS, pp. 91\u201399 (2015)"},{"key":"6_CR35","unstructured":"Roddick, T., Kendall, A., Cipolla, R.: Orthographic feature transform for monocular 3D object detection. In: British Machine Vision Conference (2019)"},{"key":"6_CR36","doi-asserted-by":"crossref","unstructured":"Shi, S., Wang, X., Li, H.: PointRCNN: 3D object proposal generation and detection from point cloud. In: CVPR, pp. 770\u2013779 (2019)","DOI":"10.1109\/CVPR.2019.00086"},{"key":"6_CR37","doi-asserted-by":"crossref","unstructured":"Shi, X., Shan, S., Kan, M., Wu, S., Chen, X.: Real-time rotation-invariant face detection with progressive calibration networks. In: CVPR, pp. 2295\u20132303 (2018)","DOI":"10.1109\/CVPR.2018.00244"},{"key":"6_CR38","doi-asserted-by":"crossref","unstructured":"Simonelli, A., Bul\u00f2, S.R., Porzi, L., L\u00f3pez-Antequera, M., Kontschieder, P.: Disentangling monocular 3D object detection. In: ICCV, pp. 1991\u20131999 (2019)","DOI":"10.1109\/ICCV.2019.00208"},{"key":"6_CR39","doi-asserted-by":"crossref","unstructured":"Singh, B., Davis, L.S.: An analysis of scale invariance in object detection SNIP. In: CVPR, pp. 3578\u20133587 (2018)","DOI":"10.1109\/CVPR.2018.00377"},{"key":"6_CR40","doi-asserted-by":"crossref","unstructured":"Tian, Z., Shen, C., Chen, H., He, T.: FCOS: fully convolutional one-stage object detection. In: CVPR, pp. 9627\u20139636 (2019)","DOI":"10.1109\/ICCV.2019.00972"},{"issue":"2","key":"6_CR41","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1023\/B:VISI.0000013087.49260.fb","volume":"57","author":"PA Viola","year":"2004","unstructured":"Viola, P.A., Jones, M.J.: Robust real-time face detection. Int. J. Comput. Vision 57(2), 137\u2013154 (2004)","journal-title":"Int. J. Comput. Vision"},{"key":"6_CR42","doi-asserted-by":"crossref","unstructured":"Wang, Y., Chao, W., Garg, D., Hariharan, B., Campbell, M.E., Weinberger, K.Q.: Pseudo-lidar from visual depth estimation: bridging the gap in 3D object detection for autonomous driving. In: CVPR, pp. 8445\u20138453 (2019)","DOI":"10.1109\/CVPR.2019.00864"},{"key":"6_CR43","doi-asserted-by":"crossref","unstructured":"Xiang, Y., Choi, W., Lin, Y., Savarese, S.: Subcategory-aware convolutional neural networks for object proposals and detection. In: WACV, pp. 924\u2013933 (2017)","DOI":"10.1109\/WACV.2017.108"},{"key":"6_CR44","doi-asserted-by":"crossref","unstructured":"Xu, B., Chen, Z.: Multi-level fusion based 3D object detection from monocular images. In: CVPR, pp. 2345\u20132353 (2018)","DOI":"10.1109\/CVPR.2018.00249"},{"key":"6_CR45","doi-asserted-by":"crossref","unstructured":"Yang, B., Luo, W., Urtasun, R.: PIXOR: real-time 3d object detection from point clouds. In: CVPR, pp. 7652\u20137660 (2018)","DOI":"10.1109\/CVPR.2018.00798"},{"key":"6_CR46","doi-asserted-by":"crossref","unstructured":"Yu, J., Jiang, Y., Wang, Z., Cao, Z., Huang, T.S.: Unitbox: an advanced object detection network. In: ACM MM, pp. 516\u2013520. ACM (2016)","DOI":"10.1145\/2964284.2967274"},{"key":"6_CR47","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-319-10605-2_1","volume-title":"Computer Vision \u2013 ECCV 2014","author":"J Zhang","year":"2014","unstructured":"Zhang, J., Shan, S., Kan, M., Chen, X.: Coarse-to-fine auto-encoder networks (CFAN) for real-time face alignment. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8690, pp. 1\u201316. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10605-2_1"},{"issue":"10","key":"6_CR48","doi-asserted-by":"publisher","first-page":"1499","DOI":"10.1109\/LSP.2016.2603342","volume":"23","author":"K Zhang","year":"2016","unstructured":"Zhang, K., Zhang, Z., Li, Z., Qiao, Y.: Joint face detection and alignment using multitask cascaded convolutional networks. IEEE Signal Process. Lett. 23(10), 1499\u20131503 (2016)","journal-title":"IEEE Signal Process. Lett."},{"key":"6_CR49","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Tuzel, O.: VoxelNet: end-to-end learning for point cloud based 3D object detection. In: CVPR, pp. 4490\u20134499 (2018)","DOI":"10.1109\/CVPR.2018.00472"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58526-6_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,6]],"date-time":"2024-10-06T00:08:22Z","timestamp":1728173302000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58526-6_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585259","9783030585266"],"references-count":49,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58526-6_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"7 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic. From the ECCV Workshops 249 full papers, 18 short papers, and 21 further contributions were published out of a total of 467 submissions.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}