{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T04:07:05Z","timestamp":1773547625659,"version":"3.50.1"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030695248","type":"print"},{"value":"9783030695255","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-69525-5_21","type":"book-chapter","created":{"date-parts":[[2021,2,26]],"date-time":"2021-02-26T16:21:16Z","timestamp":1614356476000},"page":"349-364","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Dynamic Depth Fusion and Transformation for Monocular 3D Object Detection"],"prefix":"10.1007","author":[{"given":"Erli","family":"Ouyang","sequence":"first","affiliation":[]},{"given":"Li","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Mohan","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Anurag","family":"Arnab","sequence":"additional","affiliation":[]},{"given":"Yanwei","family":"Fu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,2,27]]},"reference":[{"key":"21_CR1","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: PointNet: deep learning on point sets for 3D classification and segmentation. In: CVPR (2017)"},{"key":"21_CR2","unstructured":"Qi, C.R., Yi, L., Su, H., Guibas, L.J.: PointNet++: deep hierarchical feature learning on point sets in a metric space. In: NeurIPS (2017)"},{"key":"21_CR3","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Tuzel, O.: VoxelNet: end-to-end learning for point cloud based 3D object detection. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00472"},{"key":"21_CR4","doi-asserted-by":"crossref","unstructured":"Qi, C.R., Liu, W., Wu, C., Su, H., Guibas, L.J.: Frustum PointNets for 3D object detection from RGB-D data. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00102"},{"key":"21_CR5","doi-asserted-by":"crossref","unstructured":"Shi, S., Wang, X., Li, H.: PointRCNN: 3D object proposal generation and detection from point cloud. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00086"},{"key":"21_CR6","doi-asserted-by":"crossref","unstructured":"Chen, X., Ma, H., Wan, J., Li, B., Xia, T.: Multi-view 3D object detection network for autonomous driving. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.691"},{"key":"21_CR7","doi-asserted-by":"crossref","unstructured":"Liang, M., Yang, B., Wang, S., Urtasun, R.: Deep continuous fusion for multi-sensor 3D object detection. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01270-0_39"},{"key":"21_CR8","doi-asserted-by":"crossref","unstructured":"Yang, B., Luo, W., Urtasun, R.: Pixor: real-time 3D object detection from point clouds. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00798"},{"key":"21_CR9","doi-asserted-by":"crossref","unstructured":"Chen, X., Kundu, K., Zhang, Z., Ma, H., Fidler, S., Urtasun, R.: Monocular 3D object detection for autonomous driving. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.236"},{"key":"21_CR10","doi-asserted-by":"crossref","unstructured":"Mousavian, A., Anguelov, D., Flynn, J., Kosecka, J.: 3D bounding box estimation using deep learning and geometry. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.597"},{"key":"21_CR11","doi-asserted-by":"crossref","unstructured":"Xu, B., Chen, Z.: Multi-level fusion based 3D object detection from monocular images. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00249"},{"key":"21_CR12","unstructured":"Zhang, L., Li, X., Arnab, A., Yang, K., Tong, Y., Torr, P.H.: Dual graph convolutional network for semantic segmentation. In: BMVC (2019)"},{"key":"21_CR13","doi-asserted-by":"crossref","unstructured":"Zhang, L., Xu, D., Arnab, A., Torr, P.H.: Dynamic graph message passing networks. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00378"},{"key":"21_CR14","doi-asserted-by":"crossref","unstructured":"Hou, Q., Zhang, L., Cheng, M.M., Feng, J.: Strip pooling: rethinking spatial pooling for scene parsing. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00406"},{"key":"21_CR15","unstructured":"Li, X., Zhang, L., You, A., Yang, M., Yang, K., Tong, Y.: Global aggregation then local distribution in fully convolutional networks. In: BMVC (2019)"},{"key":"21_CR16","doi-asserted-by":"crossref","unstructured":"Li, X., et al.: Improving semantic segmentation via decoupled body and edge supervision. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58520-4_26"},{"key":"21_CR17","doi-asserted-by":"crossref","unstructured":"Wang, Q., Zhang, L., Bertinetto, L., Hu, W., Torr, P.H.: Fast online object tracking and segmentation: a unifying approach. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00142"},{"key":"21_CR18","unstructured":"Zhu, F., Zhang, L., Fu, Y., Guo, G., Xie, W.: Self-supervised video object segmentation. arXiv preprint (2020)"},{"key":"21_CR19","doi-asserted-by":"crossref","unstructured":"Wang, Y., Chao, W.L., Garg, D., Hariharan, B., Campbell, M., Weinberger, K.: Pseudo-LiDAR from visual depth estimation: bridging the gap in 3D object detection for autonomous driving. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00864"},{"key":"21_CR20","doi-asserted-by":"crossref","unstructured":"Ma, X., Wang, Z., Li, H., Zhang, P., Ouyang, W., Fan, X.: Accurate monocular 3D object detection via color-embedded 3D reconstruction for autonomous driving. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00695"},{"key":"21_CR21","unstructured":"Chen, X., et al.: 3D object proposals for accurate object class detection. In: NeurIPS (2015)"},{"key":"21_CR22","doi-asserted-by":"crossref","unstructured":"Li, P., Chen, X., Shen, S.: Stereo R-CNN based 3D object detection for autonomous driving. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00783"},{"key":"21_CR23","doi-asserted-by":"crossref","unstructured":"Fu, H., Gong, M., Wang, C., Batmanghelich, K., Tao, D.: Deep ordinal regression network for monocular depth estimation. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00214"},{"key":"21_CR24","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? The kitti vision benchmark suite. In: CVPR (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"21_CR25","doi-asserted-by":"crossref","unstructured":"Li, B., Ouyang, W., Sheng, L., Zeng, X., Wang, X.: GS3D: an efficient 3D object detection framework for autonomous driving. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00111"},{"key":"21_CR26","doi-asserted-by":"crossref","unstructured":"Brazil, G., Liu, X.: M3D-RPN: monocular 3D region proposal network for object detection. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00938"},{"key":"21_CR27","doi-asserted-by":"crossref","unstructured":"Manhardt, F., Kehl, W., Gaidon, A.: ROI-10D: monocular lifting of 2D detection to 6D pose and metric shape. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00217"},{"key":"21_CR28","doi-asserted-by":"crossref","unstructured":"Huang, X., Belongie, S.: Arbitrary style transfer in real-time with adaptive instance normalization. In: CVPR (2017)","DOI":"10.1109\/ICCV.2017.167"},{"key":"21_CR29","doi-asserted-by":"crossref","unstructured":"Park, T., Liu, M.Y., Wang, T.C., Zhu, J.Y.: Semantic image synthesis with spatially-adaptive normalization. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00244"},{"key":"21_CR30","doi-asserted-by":"crossref","unstructured":"Dai, J., et al.: Deformable convolutional networks. In: CVPR (2017)","DOI":"10.1109\/ICCV.2017.89"},{"key":"21_CR31","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"483","DOI":"10.1007\/978-3-319-46484-8_29","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Newell","year":"2016","unstructured":"Newell, A., Yang, K., Deng, J.: Stacked hourglass networks for human pose estimation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 483\u2013499. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_29"},{"key":"21_CR32","doi-asserted-by":"crossref","unstructured":"Wei, S.E., Ramakrishna, V., Kanade, T., Sheikh, Y.: Convolutional pose machines. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.511"},{"key":"21_CR33","unstructured":"Zhou, X., Wang, D., Kr\u00e4henb\u00fchl, P.: Objects as points. In: arXiv preprint (2019)"},{"key":"21_CR34","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: CVPR (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"21_CR35","doi-asserted-by":"crossref","unstructured":"Yu, F., Wang, D., Shelhamer, E., Darrell, T.: Deep layer aggregation. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00255"},{"key":"21_CR36","doi-asserted-by":"crossref","unstructured":"Qin, Z., Wang, J., Lu, Y.: MonoGRNet: a geometric reasoning network for monocular 3D object localization. In: AAAI (2019)","DOI":"10.1609\/aaai.v33i01.33018851"},{"key":"21_CR37","doi-asserted-by":"crossref","unstructured":"Simonelli, A., Bulo, S.R., Porzi, L., L\u00f3pez-Antequera, M., Kontschieder, P.: Disentangling monocular 3D object detection. In: CVPR (2019)","DOI":"10.1109\/ICCV.2019.00208"},{"key":"21_CR38","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1007\/978-3-030-58592-1_9","volume-title":"Computer Vision \u2013 ECCV 2020","author":"G Brazil","year":"2020","unstructured":"Brazil, G., Pons-Moll, G., Liu, X., Schiele, B.: Kinematic 3D object detection in\u00a0monocular video. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12368, pp. 135\u2013152. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58592-1_9"},{"key":"21_CR39","doi-asserted-by":"crossref","unstructured":"Liu, L., Lu, J., Xu, C., Tian, Q., Zhou, J.: Deep fitting degree scoring network for monocular 3D object detection. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00115"},{"key":"21_CR40","doi-asserted-by":"crossref","unstructured":"Ku, J., Pon, A.D., Waslander, S.L.: Monocular 3D object detection leveraging accurate proposals and shape reconstruction. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01214"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2020"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-69525-5_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,2,26]],"date-time":"2021-02-26T16:49:56Z","timestamp":1614358196000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-69525-5_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030695248","9783030695255"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-69525-5_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"27 February 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kyoto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 November 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 December 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/accv2020.kyoto\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"768","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"254","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"33% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}