{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T14:00:27Z","timestamp":1772805627457,"version":"3.50.1"},"reference-count":73,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2021,4,1]],"date-time":"2021-04-01T00:00:00Z","timestamp":1617235200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,4,1]],"date-time":"2021-04-01T00:00:00Z","timestamp":1617235200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1007\/s11263-021-01456-w","type":"journal-article","created":{"date-parts":[[2021,4,1]],"date-time":"2021-04-01T11:03:28Z","timestamp":1617275008000},"page":"1857-1874","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":20,"title":["Vote-Based 3D Object Detection with Context Modeling and SOB-3DNMS"],"prefix":"10.1007","volume":"129","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9901-0396","authenticated-orcid":false,"given":"Qian","family":"Xie","sequence":"first","affiliation":[]},{"given":"Yu-Kun","family":"Lai","sequence":"additional","affiliation":[]},{"given":"Jing","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Zhoutao","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yiming","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Kai","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,4,1]]},"reference":[{"key":"1456_CR1","doi-asserted-by":"crossref","unstructured":"Atzmon, M., Maron, H., & Lipman, Y. (2018). Point convolutional neural networks by extension operators. arXiv preprint arXiv:1803.10091.","DOI":"10.1145\/3197517.3201301"},{"key":"1456_CR2","doi-asserted-by":"crossref","unstructured":"Bodla, N., Singh, B., Chellappa, R., & Davis, L. S. (2017). Soft-nms\u2013improving object detection with one line of code. In Proceedings of the IEEE international conference on computer vision (pp. 5561\u20135569).","DOI":"10.1109\/ICCV.2017.593"},{"key":"1456_CR3","doi-asserted-by":"crossref","unstructured":"Cao, Y., Xu, J., Lin, S., Wei, F., & Hu, H. (2019). Gcnet: Non-local networks meet squeeze-excitation networks and beyond. In Proceedings of the IEEE international conference on computer vision workshops.","DOI":"10.1109\/ICCVW.2019.00246"},{"key":"1456_CR4","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., & Zagoruyko, S. (2020). End-to-end object detection with transformers. arXiv preprint arXiv:2005.12872.","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"1456_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Z., Huang, S., & Tao, D. (2018). Context refinement for object detection. In Proceedings of the European conference on computer vision (ECCV) (pp. 71\u201386).","DOI":"10.1007\/978-3-030-01237-3_5"},{"key":"1456_CR6","doi-asserted-by":"crossref","unstructured":"Chen, J., Lei, B., Song, Q., Ying, H., Chen, DZ., & Wu, J. (2020). A hierarchical graph network for 3d object detection on point clouds. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 392\u2013401).","DOI":"10.1109\/CVPR42600.2020.00047"},{"key":"1456_CR7","doi-asserted-by":"crossref","unstructured":"Choy, C., Gwak, J., & Savarese, S. (2019). 4d spatio-temporal convnets: Minkowski convolutional neural networks. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3075\u20133084).","DOI":"10.1109\/CVPR.2019.00319"},{"key":"1456_CR8","doi-asserted-by":"crossref","unstructured":"Dai, A., Chang, AX., Savva, M., Halber, M., Funkhouser, T., & Nie\u00dfner, M. (2017). ScanNet: Richly-annotated 3d reconstructions of indoor scenes. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 5828\u20135839).","DOI":"10.1109\/CVPR.2017.261"},{"key":"1456_CR9","doi-asserted-by":"crossref","unstructured":"Dalal, N., & Triggs, B. (2005). Histograms of oriented gradients for human detection. In 2005 IEEE computer society conference on computer vision and pattern recognition (CVPR\u201905) (Vol.\u00a01, pp. 886\u2013893). IEEE.","DOI":"10.1109\/CVPR.2005.177"},{"key":"1456_CR10","doi-asserted-by":"crossref","unstructured":"Deng, H., Birdal, T., & Ilic, S. (2018). Ppfnet: Global context aware local features for robust 3D point matching. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 195\u2013205).","DOI":"10.1109\/CVPR.2018.00028"},{"key":"1456_CR11","doi-asserted-by":"crossref","unstructured":"Engelmann, F., Bokeloh, M., Fathi, A., Leibe, B., & Nie\u00dfner, M. (2020). 3d-mpa: Multi-proposal aggregation for 3d semantic instance segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 9031\u20139040).","DOI":"10.1109\/CVPR42600.2020.00905"},{"key":"1456_CR12","doi-asserted-by":"crossref","unstructured":"Engelmann, F., Kontogianni, T., Hermans, A., & Leibe, B. (2017). Exploring spatial context for 3D semantic segmentation of point clouds. In Proceedings of the IEEE international conference on computer vision (pp. 716\u2013724).","DOI":"10.1109\/ICCVW.2017.90"},{"key":"1456_CR13","doi-asserted-by":"crossref","unstructured":"Fu, J., Liu, J., Tian, H., Li, Y., Bao, Y., Fang, Z., & Lu, H. (2019). Dual attention network for scene segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3146\u20133154).","DOI":"10.1109\/CVPR.2019.00326"},{"key":"1456_CR14","doi-asserted-by":"crossref","unstructured":"He, C., Zeng, H., Huang, J., Hua, XS., & Zhang, L. (2020). Structure aware single-stage 3d object detection from point cloud. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 11,873\u201311,882).","DOI":"10.1109\/CVPR42600.2020.01189"},{"key":"1456_CR15","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., & Girshick, R. (2017). Mask R-CNN. In Proceedings of the IEEE international conference on computer vision (pp. 2961\u20132969).","DOI":"10.1109\/ICCV.2017.322"},{"key":"1456_CR16","doi-asserted-by":"crossref","unstructured":"He, Y., Zhang, X., Savvides, M., & Kitani, K. (2018). Softer-nms: Rethinking bounding box regression for accurate object detection. arXiv preprint arXiv:1809.08545","DOI":"10.1109\/CVPR.2019.00300"},{"key":"1456_CR17","doi-asserted-by":"crossref","unstructured":"Hou, J., Dai, A., & Nie\u00dfner, M. (2019). 3d-sis: 3D semantic instance segmentation of RGB-D scans. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 4421\u20134430).","DOI":"10.1109\/CVPR.2019.00455"},{"key":"1456_CR18","doi-asserted-by":"crossref","unstructured":"Hu, H., Gu. J., Zhang. Z., Dai. J., & Wei, Y. (2018a). Relation networks for object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3588\u20133597).","DOI":"10.1109\/CVPR.2018.00378"},{"key":"1456_CR19","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., & Sun, G. (2018b). Squeeze-and-excitation networks. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 7132\u20137141).","DOI":"10.1109\/CVPR.2018.00745"},{"key":"1456_CR20","doi-asserted-by":"crossref","unstructured":"Hu, S. M., Cai, J. X., & Lai, Y. K. (2018c). Semantic labeling and instance segmentation of 3D point clouds using patch context analysis and multiscale processing. IEEE Transactions on Visualization and Computer Graphics, 26, 2485\u20132498.","DOI":"10.1109\/TVCG.2018.2889944"},{"key":"1456_CR21","doi-asserted-by":"crossref","unstructured":"Jiang, L., Zhao, H., Shi, S., Liu, S., Fu, CW., & Jia, J. (2020). Pointgroup: Dual-set point grouping for 3d instance segmentation. arXiv preprint arXiv:2004.01658.","DOI":"10.1109\/CVPR42600.2020.00492"},{"key":"1456_CR22","doi-asserted-by":"crossref","unstructured":"Lahoud, J., & Ghanem, B. (2017). 2D-driven 3D object detection in RGB-D images. In Proceedings of the IEEE international conference on computer vision (pp. 4622\u20134630).","DOI":"10.1109\/ICCV.2017.495"},{"key":"1456_CR23","doi-asserted-by":"crossref","unstructured":"Lang, A. H., Vora, S., Caesar, H., Zhou, L., Yang, J., & Beijbom, O. (2019). Pointpillars: Fast encoders for object detection from point clouds. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 12,697\u201312,705).","DOI":"10.1109\/CVPR.2019.01298"},{"key":"1456_CR24","unstructured":"Li, J., Luo, S., Zhu, Z., Dai, H., Krylov, A. S., Ding, Y., & Shao, L. (2020a). 3d iou-net: Iou guided 3d object detector for point clouds. arXiv preprint arXiv:2004.04962."},{"key":"1456_CR25","unstructured":"Li, Y., Bu, R., Sun, M., Wu, W., Di, X., & Chen, B. (2018). PointCNN: Convolution on x-transformed points. In Advances in neural information processing systems (pp. 820\u2013830)."},{"key":"1456_CR26","doi-asserted-by":"crossref","unstructured":"Li, Y., Ma, L., Tan, W., Sun, C., Cao, D., & Li, J. (2020b). Grnet: Geometric relation network for 3d object detection from point clouds. ISPRS Journal of Photogrammetry and Remote Sensing, 165, 43\u201353.","DOI":"10.1016\/j.isprsjprs.2020.05.008"},{"key":"1456_CR27","doi-asserted-by":"crossref","unstructured":"Liu, S., Huang, D., & Wang, Y. (2019a). Adaptive nms: Refining pedestrian detection in a crowd. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 6459\u20136468).","DOI":"10.1109\/CVPR.2019.00662"},{"key":"1456_CR28","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C. Y., & Berg, A. C. (2016). Ssd: Single shot multibox detector. In European conference on computer vision (pp. 21\u201337). Springer.","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1456_CR29","doi-asserted-by":"crossref","unstructured":"Liu, Y., Fan, B., Xiang, S., & Pan, C. (2019b). Relation-shape convolutional neural network for point cloud analysis. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 8895\u20138904).","DOI":"10.1109\/CVPR.2019.00910"},{"key":"1456_CR30","doi-asserted-by":"crossref","unstructured":"Liu, Y., Wang, R., Shan, S., & Chen, X. (2018). Structure inference net: Object detection using scene-level context and instance-level relationships. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 6985\u20136994).","DOI":"10.1109\/CVPR.2018.00730"},{"key":"1456_CR31","doi-asserted-by":"crossref","unstructured":"McCormac, J., Clark, R., Bloesch, M., Davison, A., & Leutenegger, S. (2018). Fusion++: Volumetric object-level SLAM. In 2018 international conference on 3D vision (3DV) (pp. 32\u201341). IEEE.","DOI":"10.1109\/3DV.2018.00015"},{"key":"1456_CR32","doi-asserted-by":"crossref","unstructured":"Mottaghi, R., Chen, X., Liu, X., Cho, N. G., Lee, S. W., Fidler, S., Urtasun, R., & Yuille, A. (2014). The role of context for object detection and semantic segmentation in the wild. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 891\u2013898).","DOI":"10.1109\/CVPR.2014.119"},{"key":"1456_CR33","doi-asserted-by":"crossref","unstructured":"Najibi, M., Lai, G., Kundu, A., Lu, Z., Rathod, V., Funkhouser, T., Pantofaru, C., Ross, D., Davis, L. S., & Fathi, A. (2020). Dops: Learning to detect 3d objects and predict their 3d shapes. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 11,913\u201311,922).","DOI":"10.1109\/CVPR42600.2020.01193"},{"key":"1456_CR34","doi-asserted-by":"crossref","unstructured":"Paigwar, A., Erkent, O., Wolf, C., & Laugier, C. (2019). Attentional PointNet for 3D-object detection in point clouds. In Proceedings of the IEEE conference on computer vision and pattern recognition workshops.","DOI":"10.1109\/CVPRW.2019.00169"},{"key":"1456_CR35","doi-asserted-by":"crossref","unstructured":"Qi, C. R., Chen, X., Litany, O., & Guibas, L. J. (2020). Imvotenet: Boosting 3d object detection in point clouds with image votes. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 4404\u20134413).","DOI":"10.1109\/CVPR42600.2020.00446"},{"key":"1456_CR36","doi-asserted-by":"crossref","unstructured":"Qi, C. R., Litany, O., He, K., & Guibas, L. J. (2019). Deep Hough voting for 3D object detection in point clouds. arXiv preprint arXiv:1904.09664.","DOI":"10.1109\/ICCV.2019.00937"},{"key":"1456_CR37","doi-asserted-by":"crossref","unstructured":"Qi, C. R., Liu, W., Wu, C., Su, H., & Guibas, L. J. (2018). Frustum PointNets for 3D object detection from RGB-D data. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 918\u2013927).","DOI":"10.1109\/CVPR.2018.00102"},{"key":"1456_CR38","unstructured":"Qi, C. R., Su, H., Mo, K., & Guibas, L. J. (2017a). PointNet: Deep learning on point sets for 3D classification and segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 652\u2013660)."},{"key":"1456_CR39","unstructured":"Qi, C. R., Yi, L., Su, H., & Guibas, L. J. (2017b). PointNet++: Deep hierarchical feature learning on point sets in a metric space. In Advances in neural information processing systems (pp. 5099\u20135108)."},{"key":"1456_CR40","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., & Farhadi, A. (2016). You only look once: Unified, real-time object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 779\u2013788).","DOI":"10.1109\/CVPR.2016.91"},{"key":"1456_CR41","unstructured":"Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks. In Advances in neural information processing systems (pp. 91\u201399)."},{"key":"1456_CR42","doi-asserted-by":"crossref","unstructured":"Ren, Z., & Sudderth, E. B. (2016). Three-dimensional object detection and layout prediction using clouds of oriented gradients. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 1525\u20131533).","DOI":"10.1109\/CVPR.2016.169"},{"key":"1456_CR43","doi-asserted-by":"crossref","unstructured":"Rezatofighi, H., Tsoi, N., Gwak, J., Sadeghian, A., Reid, I., & Savarese, S. (2019). Generalized intersection over union: A metric and a loss for bounding box regression. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 658\u2013666).","DOI":"10.1109\/CVPR.2019.00075"},{"key":"1456_CR44","unstructured":"Salscheider, N. O. (2020). Featurenms: Non-maximum suppression by learning feature embeddings. arXiv preprint arXiv:2002.07662."},{"key":"1456_CR45","doi-asserted-by":"crossref","unstructured":"Shi, S., Guo, C., Jiang, L., Wang, Z., Shi, J., Wang, X., & Li, H. (2020). Pv-rcnn: Point-voxel feature set abstraction for 3d object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR42600.2020.01054"},{"key":"1456_CR46","doi-asserted-by":"crossref","unstructured":"Shi, S., Wang, X., & Li, H. (2019a). PointRCNN: 3D object proposal generation and detection from point cloud. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 770\u2013779).","DOI":"10.1109\/CVPR.2019.00086"},{"key":"1456_CR47","doi-asserted-by":"crossref","unstructured":"Shi, W., & Rajkumar, R. (2020). Point-gnn: Graph neural network for 3d object detection in a point cloud. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 1711\u20131719).","DOI":"10.1109\/CVPR42600.2020.00178"},{"key":"1456_CR48","doi-asserted-by":"crossref","unstructured":"Shi, Y., Chang, AX., Wu, Z., Savva, M., & Xu, K. (2019b). Hierarchy denoising recursive autoencoders for 3D scene layout prediction. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 1771\u20131780).","DOI":"10.1109\/CVPR.2019.00187"},{"key":"1456_CR49","doi-asserted-by":"crossref","unstructured":"Song, S., Lichtenberg, S. P., & Xiao, J. (2015). SUN RGB-D: A RGB-D scene understanding benchmark suite. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 567\u2013576).","DOI":"10.1109\/CVPR.2015.7298655"},{"key":"1456_CR50","doi-asserted-by":"crossref","unstructured":"Song, S., & Xiao, J. (2016). Deep sliding shapes for amodal 3D object detection in RGB-D images. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 808\u2013816).","DOI":"10.1109\/CVPR.2016.94"},{"key":"1456_CR51","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., Kaiser, \u0141., & Polosukhin, I. (2017). Attention is all you need. In Advances in neural information processing systems (pp. 5998\u20136008)."},{"key":"1456_CR52","doi-asserted-by":"crossref","unstructured":"Wang, C., Xu, D., Zhu, Y., Mart\u00edn-Mart\u00edn, R., Lu, C., Fei-Fei, L., & Savarese, S. (2019). DenseFusion: 6D object pose estimation by iterative dense fusion. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3343\u20133352).","DOI":"10.1109\/CVPR.2019.00346"},{"key":"1456_CR53","unstructured":"Wang, G., Tian, B., Ai, Y., Xu, T., Chen, L., & Cao, D. (2020). Centernet3d: An anchor free object detector for autonomous driving. arXiv preprint arXiv:2007.07214."},{"issue":"4","key":"1456_CR54","first-page":"72","volume":"36","author":"PS Wang","year":"2017","unstructured":"Wang, P. S., Liu, Y., Guo, Y. X., Sun, C. Y., & Tong, X. (2017). O-CNN: Octree-based convolutional neural networks for 3D shape analysis. ACM Transactions on Graphics (TOG), 36(4), 72.","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"1456_CR55","doi-asserted-by":"crossref","unstructured":"Wang, T., He, X., & Barnes, N. (2013). Learning structured Hough voting for joint object detection and occlusion reasoning. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 1790\u20131797).","DOI":"10.1109\/CVPR.2013.234"},{"key":"1456_CR56","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., & He, K. (2018). Non-local neural networks. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 7794\u20137803).","DOI":"10.1109\/CVPR.2018.00813"},{"key":"1456_CR57","doi-asserted-by":"crossref","unstructured":"Xie, Q., Lai, YK., Wu, J., Wang, Z., Zhang, Y., Xu, K., & Wang, J. (2020). Mlcvnet: Multi-level context votenet for 3d object detection. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 10,447\u201310,456).","DOI":"10.1109\/CVPR42600.2020.01046"},{"key":"1456_CR58","doi-asserted-by":"crossref","unstructured":"Xie, S., Liu, S., Chen, Z., & Tu, Z. (2018). Attentional shapecontextnet for point cloud recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp 4606\u20134615).","DOI":"10.1109\/CVPR.2018.00484"},{"key":"1456_CR59","doi-asserted-by":"crossref","unstructured":"Xu, D., Anguelov, D., & Jain, A. (2018). Pointfusion: Deep sensor fusion for 3d bounding box estimation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 244\u2013253).","DOI":"10.1109\/CVPR.2018.00033"},{"key":"1456_CR60","doi-asserted-by":"crossref","unstructured":"Yang, J., Lu, J., Lee, S., Batra, D., & Parikh, D. (2018). Graph r-cnn for scene graph generation. In Proceedings of the European conference on computer vision (ECCV) (pp. 670\u2013685).","DOI":"10.1007\/978-3-030-01246-5_41"},{"key":"1456_CR61","doi-asserted-by":"crossref","unstructured":"Yang, Z., Sun, Y., Liu, S., & Jia, J. (2020). 3dssd: Point-based 3d single stage object detector. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 11,040\u201311,048).","DOI":"10.1109\/CVPR42600.2020.01105"},{"key":"1456_CR62","doi-asserted-by":"crossref","unstructured":"Ye, X., Li, J., Huang, H., Du, L., & Zhang, X. (2018). 3D recurrent neural networks with context fusion for point cloud semantic segmentation. In Proceedings of the European conference on computer vision (ECCV) (pp. 403\u2013417).","DOI":"10.1007\/978-3-030-01234-2_25"},{"key":"1456_CR63","doi-asserted-by":"crossref","unstructured":"Yi, L., Zhao, W., Wang, H., Sung, M., & Guibas, L. J. (2019). GSPN: Generative shape proposal network for 3D instance segmentation in point cloud. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3947\u20133956).","DOI":"10.1109\/CVPR.2019.00407"},{"key":"1456_CR64","unstructured":"Yin, T., Zhou, X., & Kr\u00e4henb\u00fchl, P. (2020). Center-based 3d object detection and tracking. arXiv preprint arXiv:2006.11275."},{"key":"1456_CR65","doi-asserted-by":"crossref","unstructured":"Yu, R., Chen, X., Morariu, V. I., & Davis, L. S. (2016). The role of context selection in object detection. arXiv preprint arXiv:1609.02948.","DOI":"10.5244\/C.30.133"},{"key":"1456_CR66","unstructured":"Yue, K., Sun, M., Yuan, Y., Zhou, F., Ding, E., & Xu, F. (2018). Compact generalized non-local network. In Advances in neural information processing systems (pp. 6510\u20136519)."},{"key":"1456_CR67","unstructured":"Zambaldi, V., Raposo, D., Santoro, A., Bapst, V., Li, Y., Babuschkin, I., Tuyls, K., Reichert, D., Lillicrap, T., Lockhart, E., et\u00a0al. (2018). Relational deep reinforcement learning. arXiv preprint arXiv:1806.01830."},{"key":"1456_CR68","doi-asserted-by":"crossref","unstructured":"Zhang, W., & Xiao, C. (2019). PCAN: 3D attention map learning using contextual information for point cloud based retrieval. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 12,436\u201312,445).","DOI":"10.1109\/CVPR.2019.01272"},{"key":"1456_CR69","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Bai, M., Kohli, P., Izadi, S., & Xiao, J. (2017). Deepcontext: Context-encoding neural pathways for 3D holistic scene understanding. In Proceedings of the IEEE International conference on computer vision (pp. 1192\u20131201).","DOI":"10.1109\/ICCV.2017.135"},{"key":"1456_CR70","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Song, S., Tan, P., & Xiao, J. (2014). Panocontext: A whole-room 3D context model for panoramic scene understanding. In European conference on computer vision (pp. 668\u2013686). Springer.","DOI":"10.1007\/978-3-319-10599-4_43"},{"key":"1456_CR71","doi-asserted-by":"crossref","unstructured":"Zhang, H., Zhang, H., Wang, C., & Xie, J. (2019). Co-occurrent features in semantic segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 548\u2013557).","DOI":"10.1109\/CVPR.2019.00064"},{"key":"1456_CR72","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Wang, P., Liu, W., Li, J., Ye, R., & Ren, D. (2020). Distance-iou loss: Faster and better learning for bounding box regression. In AAAI (pp. 12,993\u201313,000).","DOI":"10.1609\/aaai.v34i07.6999"},{"key":"1456_CR73","doi-asserted-by":"crossref","unstructured":"Zhou, Y., & Tuzel, O. (2018). Voxelnet: End-to-end learning for point cloud based 3d object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 4490\u20134499).","DOI":"10.1109\/CVPR.2018.00472"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-021-01456-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-021-01456-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-021-01456-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,23]],"date-time":"2022-12-23T03:54:44Z","timestamp":1671767684000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-021-01456-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,4,1]]},"references-count":73,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2021,6]]}},"alternative-id":["1456"],"URL":"https:\/\/doi.org\/10.1007\/s11263-021-01456-w","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,4,1]]},"assertion":[{"value":"14 July 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 March 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 April 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}