{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T17:29:57Z","timestamp":1773682197746,"version":"3.50.1"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030695248","type":"print"},{"value":"9783030695255","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-69525-5_27","type":"book-chapter","created":{"date-parts":[[2021,2,26]],"date-time":"2021-02-26T16:21:16Z","timestamp":1614356476000},"page":"453-468","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["3D Object Detection from Consecutive Monocular Images"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0706-2808","authenticated-orcid":false,"given":"Chia-Chun","family":"Cheng","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5092-993X","authenticated-orcid":false,"given":"Shang-Hong","family":"Lai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,2,27]]},"reference":[{"key":"27_CR1","doi-asserted-by":"crossref","unstructured":"Behl, A., Hosseini Jafari, O., Karthik Mustikovela, S., Abu Alhaija, H., Rother, C., Geiger, A.: Bounding boxes, segmentations and object coordinates: how important is recognition for 3D scene flow estimation in autonomous driving scenarios? In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2574\u20132583 (2017)","DOI":"10.1109\/ICCV.2017.281"},{"key":"27_CR2","doi-asserted-by":"crossref","unstructured":"Guerry, J., Boulch, A., Le Saux, B., Moras, J., Plyer, A., Filliat, D.: Snapnet-r: Consistent 3D multi-view semantic labeling for robotics. In: Proceedings of the IEEE International Conference on Computer Vision Workshops, pp. 669\u2013678 (2017)","DOI":"10.1109\/ICCVW.2017.85"},{"key":"27_CR3","doi-asserted-by":"crossref","unstructured":"Shi, S., Wang, X., Li, H.: PointRCNN: 3D object proposal generation and detection from point cloud. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013779 (2019)","DOI":"10.1109\/CVPR.2019.00086"},{"key":"27_CR4","doi-asserted-by":"crossref","unstructured":"Brazil, G., Liu, X.: M3D-RPN: monocular 3D region proposal network for object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 9287\u20139296 (2019)","DOI":"10.1109\/ICCV.2019.00938"},{"key":"27_CR5","unstructured":"You, Y., et al.: Pseudo-lidar++: accurate depth for 3D object detection in autonomous driving. arXiv preprint arXiv:1906.06310 (2019)"},{"key":"27_CR6","doi-asserted-by":"crossref","unstructured":"Casser, V., Pirk, S., Mahjourian, R., Angelova, A.: Depth prediction without the sensors: Leveraging structure for unsupervised learning from monocular videos. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 8001\u20138008 (2019)","DOI":"10.1609\/aaai.v33i01.33018001"},{"key":"27_CR7","doi-asserted-by":"crossref","unstructured":"Mousavian, A., Anguelov, D., Flynn, J., Kosecka, J.: 3D bounding box estimation using deep learning and geometry. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7074\u20137082 (2017)","DOI":"10.1109\/CVPR.2017.597"},{"key":"27_CR8","doi-asserted-by":"crossref","unstructured":"Chen, X., Kundu, K., Zhang, Z., Ma, H., Fidler, S., Urtasun, R.: Monocular 3D object detection for autonomous driving. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2147\u20132156 (2016)","DOI":"10.1109\/CVPR.2016.236"},{"key":"27_CR9","doi-asserted-by":"crossref","unstructured":"Ding, M., et al.: Learning depth-guided convolutions for monocular 3D object detection. arXiv preprint arXiv:1912.04799 (2019)","DOI":"10.1109\/CVPR42600.2020.01169"},{"key":"27_CR10","doi-asserted-by":"crossref","unstructured":"Choi, M., Kim, H., Han, B., Xu, N., Lee, K.M.: Channel attention is all you need for video frame interpolation, AAAI (2020)","DOI":"10.1609\/aaai.v34i07.6693"},{"key":"27_CR11","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13, 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."},{"key":"27_CR12","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? the kitti vision benchmark suite. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 3354\u20133361. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"27_CR13","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Tuzel, O.: Voxelnet: End-to-end learning for point cloud based 3D object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4490\u20134499 (2018)","DOI":"10.1109\/CVPR.2018.00472"},{"key":"27_CR14","unstructured":"Wang, B., An, J., Cao, J.: Voxel-FPN: multi-scale voxel feature aggregation in 3D object detection from point clouds. arXiv preprint arXiv:1907.05286 (2019)"},{"key":"27_CR15","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: Pointnet: deep learning on point sets for 3D classification and segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 652\u2013660 (2017)"},{"key":"27_CR16","doi-asserted-by":"crossref","unstructured":"Qi, C.R., Liu, W., Wu, C., Su, H., Guibas, L.J.: Frustum pointnets for 3D object detection from RGB-d data. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 918\u2013927 (2018)","DOI":"10.1109\/CVPR.2018.00102"},{"key":"27_CR17","doi-asserted-by":"crossref","unstructured":"Ma, X., Wang, Z., Li, H., Zhang, P., Ouyang, W., Fan, X.: Accurate monocular 3D object detection via color-embedded 3D reconstruction for autonomous driving. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 6851\u20136860 (2019)","DOI":"10.1109\/ICCV.2019.00695"},{"key":"27_CR18","doi-asserted-by":"crossref","unstructured":"Wang, Y., Chao, W.L., Garg, D., Hariharan, B., Campbell, M., Weinberger, K.Q.: Pseudo-lidar from visual depth estimation: bridging the gap in 3D object detection for autonomous driving. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8445\u20138453 (2019)","DOI":"10.1109\/CVPR.2019.00864"},{"key":"27_CR19","doi-asserted-by":"crossref","unstructured":"Weng, X., Kitani, K.: Monocular 3D object detection with pseudo-lidar point cloud. In: Proceedings of the IEEE International Conference on Computer Vision Workshops (2019)","DOI":"10.1109\/ICCVW.2019.00114"},{"key":"27_CR20","doi-asserted-by":"crossref","unstructured":"Liu, Z., Wu, Z., T\u00f3th, R.: Smoke: single-stage monocular 3D object detection via keypoint estimation. arXiv preprint arXiv:2002.10111 (2020)","DOI":"10.1109\/CVPRW50498.2020.00506"},{"key":"27_CR21","doi-asserted-by":"crossref","unstructured":"Qin, Z., Wang, J., Lu, Y.: Monogrnet: a geometric reasoning network for monocular 3D object localization. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 8851\u20138858 (2019)","DOI":"10.1609\/aaai.v33i01.33018851"},{"key":"27_CR22","doi-asserted-by":"crossref","unstructured":"Fu, H., Gong, M., Wang, C., Batmanghelich, K., Tao, D.: Deep ordinal regression network for monocular depth estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2002\u20132011 (2018)","DOI":"10.1109\/CVPR.2018.00214"},{"key":"27_CR23","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"27_CR24","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, pp. 91\u201399 (2015)"},{"key":"27_CR25","unstructured":"Chen, X., et al.: 3D object proposals for accurate object class detection. In: Advances in Neural Information Processing Systems, pp. 424\u2013432 (2015)"},{"key":"27_CR26","doi-asserted-by":"crossref","unstructured":"Simonelli, A., Bulo, S.R., Porzi, L., L\u00f3pez-Antequera, M., Kontschieder, P.: Disentangling monocular 3D object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1991\u20131999 (2019)","DOI":"10.1109\/ICCV.2019.00208"},{"key":"27_CR27","doi-asserted-by":"crossref","unstructured":"Smith, L.N., Topin, N.: Super-convergence: very fast training of neural networks using large learning rates. In: Artificial Intelligence and Machine Learning for Multi-Domain Operations Applications, vol. 11006, p. 1100612, International Society for Optics and Photonics (2019)","DOI":"10.1117\/12.2520589"},{"key":"27_CR28","doi-asserted-by":"crossref","unstructured":"Naiden, A., Paunescu, V., Kim, G., Jeon, B., Leordeanu, M.: Shift R-CNN: deep monocular 3D object detection with closed-form geometric constraints. In: 2019 IEEE International Conference on Image Processing (ICIP), pp. 61\u201365. IEEE (2019)","DOI":"10.1109\/ICIP.2019.8803397"},{"key":"27_CR29","doi-asserted-by":"crossref","unstructured":"Ku, J., Pon, A.D., Waslander, S.L.: Monocular 3D object detection leveraging accurate proposals and shape reconstruction. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 11867\u201311876 (2019)","DOI":"10.1109\/CVPR.2019.01214"},{"key":"27_CR30","doi-asserted-by":"crossref","unstructured":"Xu, B., Chen, Z.: Multi-level fusion based 3D object detection from monocular images. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2345\u20132353 (2018)","DOI":"10.1109\/CVPR.2018.00249"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2020"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-69525-5_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,2,26]],"date-time":"2021-02-26T16:54:22Z","timestamp":1614358462000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-69525-5_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030695248","9783030695255"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-69525-5_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"27 February 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kyoto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 November 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 December 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/accv2020.kyoto\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"768","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"254","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"33% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}