{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T15:54:10Z","timestamp":1774540450756,"version":"3.50.1"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"25","license":[{"start":{"date-parts":[[2021,9,12]],"date-time":"2021-09-12T00:00:00Z","timestamp":1631404800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,9,12]],"date-time":"2021-09-12T00:00:00Z","timestamp":1631404800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2022,10]]},"DOI":"10.1007\/s11042-021-11506-7","type":"journal-article","created":{"date-parts":[[2021,9,12]],"date-time":"2021-09-12T12:04:13Z","timestamp":1631448253000},"page":"35803-35813","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Stereo VoVNet-CNN for 3D object detection"],"prefix":"10.1007","volume":"81","author":[{"given":"Kaiqi","family":"Su","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7869-2404","authenticated-orcid":false,"given":"Weiqing","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xin","family":"Wei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Meiqi","family":"Gu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,9,12]]},"reference":[{"key":"11506_CR1","doi-asserted-by":"crossref","unstructured":"Brazil G, Liu X (2019) M3d-rpn: Monocular 3d region proposal network for object detection. In: Proceedings of the IEEE international conference on computer vision, pp 9287\u20139296","DOI":"10.1109\/ICCV.2019.00938"},{"key":"11506_CR2","doi-asserted-by":"crossref","unstructured":"Chen X, Kundu K, Zhang Z, Ma H, Fidler S, Urtasun R (2016) Monocular 3d object detection for autonomous driving. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2147\u20132156","DOI":"10.1109\/CVPR.2016.236"},{"issue":"5","key":"11506_CR3","doi-asserted-by":"publisher","first-page":"1259","DOI":"10.1109\/TPAMI.2017.2706685","volume":"40","author":"X Chen","year":"2017","unstructured":"Chen X, Kundu K, Zhu Y, Ma H, Fidler S, Urtasun R (2017) 3d object proposals using stereo imagery for accurate object class detection. IEEE Trans Pattern Anal Mach Intell 40(5):1259\u20131272","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11506_CR4","doi-asserted-by":"crossref","unstructured":"Chen X, Ma H, Wan J, Li B, Xia T (2017) Multi-view 3d object detection network for autonomous driving. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1907\u20131915","DOI":"10.1109\/CVPR.2017.691"},{"key":"11506_CR5","doi-asserted-by":"crossref","unstructured":"Chen Y, Liu S, Shen X, Jia J (2019) Fast point r-cnn. In: Proceedings of the IEEE international conference on computer vision, pp 9775\u20139784","DOI":"10.1109\/ICCV.2019.00987"},{"key":"11506_CR6","doi-asserted-by":"crossref","unstructured":"Chen Y, Liu S, Shen X, Jia J (2020) Dsgn: Deep stereo geometry network for 3d object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12536\u201312545","DOI":"10.1109\/CVPR42600.2020.01255"},{"issue":"3","key":"11506_CR7","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1145\/3007787.3001177","volume":"44","author":"YH Chen","year":"2016","unstructured":"Chen YH, Emer J, Sze V (2016) Eyeriss: A spatial architecture for energy-efficient dataflow for convolutional neural networks. ACM SIGARCH Computer Architecture News 44(3):367\u2013379","journal-title":"ACM SIGARCH Computer Architecture News"},{"key":"11506_CR8","doi-asserted-by":"crossref","unstructured":"Chen Z, Cong R, Xu Q, Huang Q (2020) Dpanet: Depth potentiality-aware gated attention network for rgb-d salient object detection. IEEE Trans Image Process","DOI":"10.1109\/TIP.2020.3028289"},{"key":"11506_CR9","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li LJ, Li K, Fei-Fei L (2009) Imagenet: A large-scale hierarchical image database. In: 2009 IEEE conference on computer vision and pattern recognition. IEEE, pp 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"11506_CR10","doi-asserted-by":"crossref","unstructured":"Ding M, Huo Y, Yi H, Wang Z, Shi J, Lu Z, Luo P (2020) Learning depth-guided convolutions for monocular 3d object detection. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition workshops, pp 1000\u20131001","DOI":"10.1109\/CVPR42600.2020.01169"},{"key":"11506_CR11","doi-asserted-by":"crossref","unstructured":"Geiger A, Lenz P, Urtasun R (2012) Are we ready for autonomous driving? the kitti vision benchmark suite. In: 2012 IEEE conference on computer vision and pattern recognition. IEEE, pp 3354\u20133361","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"11506_CR12","unstructured":"Glorot X, Bordes A, Bengio Y (2011) Deep sparse rectifier neural networks. In: Proceedings of the fourteenth international conference on artificial intelligence and statistics, pp 315\u2013323"},{"key":"11506_CR13","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2017) Mask r-cnn. In: Proceedings of the IEEE international conference on computer vision, pp 2961\u20132969","DOI":"10.1109\/ICCV.2017.322"},{"key":"11506_CR14","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2016.90"},{"key":"11506_CR15","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: Accelerating deep network training by reducing internal covariate shift. arXiv:1502.03167"},{"key":"11506_CR16","unstructured":"J\u00f6rgensen E, Zach C, Kahl F (2019) Monocular 3d object detection and box fitting trained end-to-end using intersection-over-union loss. arXiv:1906.08070"},{"key":"11506_CR17","doi-asserted-by":"crossref","unstructured":"Ku J, Mozifian M, Lee J, Harakeh A, Waslander SL (2018) Joint 3d proposal generation and object detection from view aggregation. In: 2018 IEEE\/RSJ international conference on intelligent robots and systems (IROS). IEEE, pp 1\u20138","DOI":"10.1109\/IROS.2018.8594049"},{"key":"11506_CR18","doi-asserted-by":"crossref","unstructured":"Lee Y, Park J (2020) Centermask: Real-time anchor-free instance segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 13906\u201313915","DOI":"10.1109\/CVPR42600.2020.01392"},{"key":"11506_CR19","doi-asserted-by":"crossref","unstructured":"Li B, Zhang T, Xia T (2016) Vehicle detection from 3d lidar using fully convolutional network. arXiv:1608.07916","DOI":"10.15607\/RSS.2016.XII.042"},{"key":"11506_CR20","doi-asserted-by":"crossref","unstructured":"Li P, Chen X, Shen S (2019) Stereo r-cnn based 3d object detection for autonomous driving. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7644\u20137652","DOI":"10.1109\/CVPR.2019.00783"},{"key":"11506_CR21","doi-asserted-by":"crossref","unstructured":"Lin TY, Doll\u00e1r P, Girshick R, He K, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2117\u20132125","DOI":"10.1109\/CVPR.2017.106"},{"key":"11506_CR22","doi-asserted-by":"crossref","unstructured":"Liu S, Huang D, et al. (2018) Receptive field block net for accurate and fast object detection. In: Proceedings of the European conference on computer vision (ECCV), pp 385\u2013400","DOI":"10.1007\/978-3-030-01252-6_24"},{"issue":"6","key":"11506_CR23","doi-asserted-by":"publisher","first-page":"6787","DOI":"10.1007\/s11042-018-6319-4","volume":"78","author":"Z Liu","year":"2019","unstructured":"Liu Z, Song T, Xie F (2019) Rgb-d image saliency detection from 3d perspective. Multimed Tools Appl 78(6):6787\u20136804","journal-title":"Multimed Tools Appl"},{"key":"11506_CR24","doi-asserted-by":"crossref","unstructured":"Mousavian A, Anguelov D, Flynn J, Kosecka J (2017) 3d bounding box estimation using deep learning and geometry. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7074\u20137082","DOI":"10.1109\/CVPR.2017.597"},{"key":"11506_CR25","doi-asserted-by":"crossref","unstructured":"Qi CR, Liu W, Wu C, Su H, Guibas LJ (2018) Frustum pointnets for 3d object detection from rgb-d data. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 918\u2013927","DOI":"10.1109\/CVPR.2018.00102"},{"key":"11506_CR26","unstructured":"Qi CR, Su H, Mo K, Guibas LJ (2017) Pointnet: Deep learning on point sets for 3d classification and segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 652\u2013660"},{"key":"11506_CR27","unstructured":"Qi CR, Yi L, Su H, Guibas LJ (2017) Pointnet++: Deep hierarchical feature learning on point sets in a metric space. In: Advances in neural information processing systems, pp 5099\u20135108"},{"key":"11506_CR28","doi-asserted-by":"crossref","unstructured":"Qin Z, Wang J, Lu Y (2019) Monogrnet: A geometric reasoning network for monocular 3d object localization. In: Proceedings of the AAAI conference on artificial intelligence, vol 33, pp 8851\u20138858","DOI":"10.1609\/aaai.v33i01.33018851"},{"key":"11506_CR29","doi-asserted-by":"crossref","unstructured":"Qin Z, Wang J, Lu Y (2019) Triangulation learning network: from monocular to stereo 3d object detection. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, pp 7607\u20137615","DOI":"10.1109\/CVPR.2019.00780"},{"key":"11506_CR30","unstructured":"Roddick T, Kendall A, Cipolla R (2018) Orthographic feature transform for monocular 3d object detection. arXiv:1811.08188"},{"key":"11506_CR31","doi-asserted-by":"crossref","unstructured":"Shi S, Wang X, Li H (2019) Pointrcnn: 3d object proposal generation and detection from point cloud. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013779","DOI":"10.1109\/CVPR.2019.00086"},{"key":"11506_CR32","doi-asserted-by":"crossref","unstructured":"Simonelli A, Bulo SR, Porzi L, L\u00f3pez-Antequera M, Kontschieder P (2019) Disentangling monocular 3d object detection. In: Proceedings of the IEEE international conference on computer vision, pp 1991\u20131999","DOI":"10.1109\/ICCV.2019.00208"},{"key":"11506_CR33","doi-asserted-by":"crossref","unstructured":"Sun J, Chen L, Xie Y, Zhang S, Jiang Q, Zhou X, Bao H (2020) Disp r-cnn: Stereo 3d object detection via shape prior guided instance disparity estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10548\u201310557","DOI":"10.1109\/CVPR42600.2020.01056"},{"issue":"12","key":"11506_CR34","doi-asserted-by":"publisher","first-page":"14655","DOI":"10.1007\/s11042-017-5052-8","volume":"77","author":"H Wang","year":"2018","unstructured":"Wang H, Yan B, Wang X, Zhang Y, Yang Y (2018) Accurate saliency detection based on depth feature of 3d images. Multimed Tools Appl 77 (12):14655\u201314672","journal-title":"Multimed Tools Appl"},{"key":"11506_CR35","doi-asserted-by":"crossref","unstructured":"Xu B, Chen Z (2018) Multi-level fusion based 3d object detection from monocular images. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2345\u20132353","DOI":"10.1109\/CVPR.2018.00249"},{"key":"11506_CR36","doi-asserted-by":"crossref","unstructured":"Zhou Y, Tuzel O (2018) Voxelnet: End-to-end learning for point cloud based 3d object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4490\u20134499","DOI":"10.1109\/CVPR.2018.00472"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-021-11506-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-021-11506-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-021-11506-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,24]],"date-time":"2022-09-24T04:11:51Z","timestamp":1663992711000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-021-11506-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,12]]},"references-count":36,"journal-issue":{"issue":"25","published-print":{"date-parts":[[2022,10]]}},"alternative-id":["11506"],"URL":"https:\/\/doi.org\/10.1007\/s11042-021-11506-7","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,9,12]]},"assertion":[{"value":"30 September 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 June 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 August 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 September 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}