{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,20]],"date-time":"2026-06-20T16:56:56Z","timestamp":1781974616297,"version":"3.54.5"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2023,8,20]],"date-time":"2023-08-20T00:00:00Z","timestamp":1692489600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,8,20]],"date-time":"2023-08-20T00:00:00Z","timestamp":1692489600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2023,9]]},"DOI":"10.1007\/s00138-023-01435-w","type":"journal-article","created":{"date-parts":[[2023,8,20]],"date-time":"2023-08-20T15:01:29Z","timestamp":1692543689000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["RGB-LiDAR fusion for accurate 2D and 3D object detection"],"prefix":"10.1007","volume":"34","author":[{"given":"Morteza","family":"Mousa-Pasandi","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tianran","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2898-8834","authenticated-orcid":false,"given":"Yahya","family":"Massoud","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Robert","family":"Lagani\u00e8re","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2023,8,20]]},"reference":[{"key":"1435_CR1","unstructured":"Huang, K., Shi, B., Li, X., Li, X., Huang, S., Li, Y.: Multi-modal sensor fusion for auto driving perception: a survey. arXiv abs\/2202.02703 (2022)"},{"key":"1435_CR2","doi-asserted-by":"crossref","unstructured":"Massoud, Y.: Sensor fusion for 3d object detection for autonomous vehicles. Master\u2019s thesis, Universit\u00e9 d\u2019Ottawa \/ University of Ottawa (2021)","DOI":"10.36227\/techrxiv.21506124.v1"},{"key":"1435_CR3","doi-asserted-by":"crossref","unstructured":"Liang, M., Yang, B., Chen, Y., Hu, R., Urtasun, R.: Multi-task multi-sensor fusion for 3d object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7345\u20137353 (2019)","DOI":"10.1109\/CVPR.2019.00752"},{"key":"1435_CR4","doi-asserted-by":"crossref","unstructured":"Ku, J., Mozifian, M., Lee, J., Harakeh, A., Waslander, S.L.: Joint 3d proposal generation and object detection from view aggregation. In: 2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 1\u20138 (2018)","DOI":"10.1109\/IROS.2018.8594049"},{"key":"1435_CR5","doi-asserted-by":"crossref","unstructured":"Liang, M., Yang, B., Wang, S., Urtasun, R.: Deep continuous fusion for multi-sensor 3d object detection. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 641\u2013656 (2018)","DOI":"10.1007\/978-3-030-01270-0_39"},{"key":"1435_CR6","doi-asserted-by":"crossref","unstructured":"Wang, S., Suo, S., Ma, W.-C., Pokrovsky, A., Urtasun, R.: Deep parametric continuous convolutional neural networks. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2018)","DOI":"10.1109\/CVPR.2018.00274"},{"key":"1435_CR7","doi-asserted-by":"crossref","unstructured":"Qi, C.R., Liu, W., Wu, C., Su, H., Guibas, L.J.: Frustum pointnets for 3d object detection from RGB-D data. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 918\u2013927 (2018)","DOI":"10.1109\/CVPR.2018.00102"},{"key":"1435_CR8","doi-asserted-by":"crossref","unstructured":"Chen, X., Ma, H., Wan, J., Li, B., Xia, T.: Multi-view 3d object detection network for autonomous driving. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1907\u20131915 (2017)","DOI":"10.1109\/CVPR.2017.691"},{"key":"1435_CR9","doi-asserted-by":"crossref","unstructured":"Duan, K., Bai, S., Xie, L., Qi, H., Huang, Q., Tian, Q.: Centernet: Keypoint triplets for object detection. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00667"},{"key":"1435_CR10","doi-asserted-by":"crossref","unstructured":"Cai, Z., Vasconcelos, N.: Cascade R-CNN: delving into high quality object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6154\u20136162 (2018)","DOI":"10.1109\/CVPR.2018.00644"},{"key":"1435_CR11","unstructured":"Chen, X., Gupta, A.: An implementation of faster rcnn with study for region sampling. arXiv preprint arXiv:1702.02138 (2017)"},{"key":"1435_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: 2017 IEEE International Conference on Computer Vision (ICCV) (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"1435_CR13","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in neural information processing systems 28 (2015)"},{"key":"1435_CR14","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast R-CNN. In: 2015 IEEE International Conference on Computer Vision (ICCV) (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"1435_CR15","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: 2014 IEEE Conference on Computer Vision and Pattern Recognition (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"1435_CR16","doi-asserted-by":"crossref","unstructured":"Feng, C., Zhong, Y., Gao, Y., Scott, M.R., Huang, W.: Tood: task-aligned one-stage object detection. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3490\u20133499 (2021). IEEE Computer Society","DOI":"10.1109\/ICCV48922.2021.00349"},{"key":"1435_CR17","doi-asserted-by":"crossref","unstructured":"Wang, C.-Y., Bochkovskiy, A., Liao, H.-Y.M.: YOLOv7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. In: arXiv Preprint arXiv:2207.02696 (2022)","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"1435_CR18","doi-asserted-by":"crossref","unstructured":"Wang, C.-Y., Bochkovskiy, A., Liao, H.-Y.M.: Scaled-YOLOv4: scaling cross stage partial network. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition 2021 (pp. 13029-13038).(2021)","DOI":"10.1109\/CVPR46437.2021.01283"},{"key":"1435_CR19","doi-asserted-by":"crossref","unstructured":"Law, H., & Deng, J.: CornerNet: Detecting Objects as Paired Keypoints. Int. J. Comput Vision. https:\/\/doi.org\/10.1007\/s11263-019-01204-1 (2020)","DOI":"10.1007\/s11263-019-01204-1"},{"key":"1435_CR20","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"1435_CR21","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.-Y., Berg, A.C.: SSD: single shot MultiBox detector (2015)","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1435_CR22","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: Unified, real-time object detection. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2015)","DOI":"10.1109\/CVPR.2016.91"},{"key":"1435_CR23","doi-asserted-by":"publisher","unstructured":"Kim, J., Choi, J., Kim, Y., Koh, J., Chung, C.C., Choi, J.W.: Robust camera lidar sensor fusion via deep gated information fusion network. In: 2018 IEEE Intelligent Vehicles Symposium (IV), pp. 1620\u20131625 (2018). https:\/\/doi.org\/10.1109\/IVS.2018.8500711","DOI":"10.1109\/IVS.2018.8500711"},{"key":"1435_CR24","doi-asserted-by":"crossref","unstructured":"Condat, R., Rogozan, A., Bensrhair, A.: GFD-retina: gated fusion double retinanet for multimodal 2D road object detection. In: 2020 IEEE 23rd International Conference on Intelligent Transportation Systems (ITSC), pp. 1\u20136 (2020)","DOI":"10.1109\/ITSC45102.2020.9294447"},{"key":"1435_CR25","doi-asserted-by":"publisher","unstructured":"Du, X., Ang, M.H., Karaman, S., Rus, D.: A general pipeline for 3d detection of vehicles. In: 2018 IEEE International Conference on Robotics and Automation (ICRA), pp. 3194\u20133200 (2018). https:\/\/doi.org\/10.1109\/ICRA.2018.8461232","DOI":"10.1109\/ICRA.2018.8461232"},{"key":"1435_CR26","doi-asserted-by":"crossref","unstructured":"Xu, D., Anguelov, D., Jain, A.: Pointfusion: deep sensor fusion for 3d bounding box estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 244\u2013253 (2018)","DOI":"10.1109\/CVPR.2018.00033"},{"key":"1435_CR27","doi-asserted-by":"crossref","unstructured":"Wu, X., Peng, L., Yang, H., Xie, L., Huang, C., Deng, C., Liu, H., Cai, D.: Sparse fuse dense: towards high quality 3d detection with depth completion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5418\u20135427 (2022)","DOI":"10.1109\/CVPR52688.2022.00534"},{"key":"1435_CR28","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3189778","author":"H Zhu","year":"2022","unstructured":"Zhu, H., Deng, J., Zhang, Y., Ji, J., Mao, Q., Li, H., Zhang, Y.: VPFNet: improving 3d object detection with virtual point based lidar and stereo data fusion. IEEE Trans. Multimed. (2022). https:\/\/doi.org\/10.1109\/TMM.2022.3189778","journal-title":"IEEE Trans. Multimed."},{"key":"1435_CR29","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1435_CR30","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1435_CR31","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132125 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"1435_CR32","doi-asserted-by":"crossref","unstructured":"Yu, Z., Yu, J., Fan, J., Tao, D.: Multi-modal factorized bilinear pooling with co-attention learning for visual question answering. In: ICCV, pp. 1839\u20131848 (2017)","DOI":"10.1109\/ICCV.2017.202"},{"key":"1435_CR33","doi-asserted-by":"crossref","unstructured":"Rahman, M.A., Lagani\u00e8re, R.: Mid-level fusion for end-to-end temporal activity detection in untrimmed video. In: BMVC (2020)","DOI":"10.1109\/CRV50864.2020.00035"},{"issue":"11","key":"1435_CR34","doi-asserted-by":"publisher","first-page":"3051","DOI":"10.1007\/s11263-021-01515-2","volume":"129","author":"C Yu","year":"2021","unstructured":"Yu, C., Gao, C., Wang, J., Yu, G., Shen, C., Sang, N.: BiSeNet v2: Bilateral network with guided aggregation for real-time semantic segmentation. Int. J. Comput. Vis. 129(11), 3051\u20133068 (2021)","journal-title":"Int. J. Comput. Vis."},{"issue":"12","key":"1435_CR35","doi-asserted-by":"publisher","first-page":"4722","DOI":"10.1109\/TCSVT.2021.3100848","volume":"31","author":"J Deng","year":"2021","unstructured":"Deng, J., Zhou, W., Zhang, Y., Li, H.: From multi-view to hollow-3d: hallucinated hollow-3d R-CNN for 3d object detection. IEEE Trans. Circuits Syst. Video Technol. 31(12), 4722\u20134734 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1435_CR36","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., Urtasun, R.: Are we ready for autonomous driving? the kitti vision benchmark suite. In: Conference on Computer Vision and Pattern Recognition (CVPR) (2012)","DOI":"10.1109\/CVPR.2012.6248074"},{"issue":"5","key":"1435_CR37","doi-asserted-by":"publisher","first-page":"1259","DOI":"10.1109\/TPAMI.2017.2706685","volume":"40","author":"X Chen","year":"2017","unstructured":"Chen, X., Kundu, K., Zhu, Y., Ma, H., Fidler, S., Urtasun, R.: 3d object proposals using stereo imagery for accurate object class detection. IEEE Trans. Pattern Anal. Mach. Intell. 40(5), 1259\u20131272 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1435_CR38","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-023-01435-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-023-01435-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-023-01435-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,9]],"date-time":"2023-09-09T08:05:52Z","timestamp":1694246752000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-023-01435-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,20]]},"references-count":38,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2023,9]]}},"alternative-id":["1435"],"URL":"https:\/\/doi.org\/10.1007\/s00138-023-01435-w","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"value":"0932-8092","type":"print"},{"value":"1432-1769","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8,20]]},"assertion":[{"value":"15 March 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 July 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 July 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 August 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"86"}}