{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,5]],"date-time":"2026-04-05T21:49:07Z","timestamp":1775425747401,"version":"3.50.1"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2023,10,9]],"date-time":"2023-10-09T00:00:00Z","timestamp":1696809600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,10,9]],"date-time":"2023-10-09T00:00:00Z","timestamp":1696809600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61973029"],"award-info":[{"award-number":["61973029"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62273034"],"award-info":[{"award-number":["62273034"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62076026"],"award-info":[{"award-number":["62076026"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Scientific and Technological Innovation Foundation of Foshan","award":["BK21BF004"],"award-info":[{"award-number":["BK21BF004"]}]},{"name":"Research Project of the Beijing Young Topnotch Talents Cultivation Program","award":["CIT&TCD201904009"],"award-info":[{"award-number":["CIT&TCD201904009"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2024,8]]},"DOI":"10.1007\/s00371-023-03107-2","type":"journal-article","created":{"date-parts":[[2023,10,9]],"date-time":"2023-10-09T07:48:01Z","timestamp":1696837681000},"page":"5323-5339","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Point-voxel dual stream transformer for 3d point cloud learning"],"prefix":"10.1007","volume":"40","author":[{"given":"Tianmeng","family":"Zhao","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4558-7403","authenticated-orcid":false,"given":"Hui","family":"Zeng","sequence":"additional","affiliation":[]},{"given":"Baoqing","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Bin","family":"Fan","sequence":"additional","affiliation":[]},{"given":"Chen","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,10,9]]},"reference":[{"key":"3107_CR1","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: PointNet: deep learning on point sets for 3D classification and segmentation. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 652\u2013660 (2017)."},{"key":"3107_CR2","doi-asserted-by":"crossref","unstructured":"Liu, T., Cai Y., Zheng J., Thalmann N.M., BEACon: a boundary embedded attentional convolution network for point cloud instance segmentation, 38, pp. 2303\u20132313 (2022)","DOI":"10.1007\/s00371-021-02112-7"},{"key":"3107_CR3","unstructured":"Qi, C.R., Yi, L., Su, H., Guibas, L.J.: PointNet++: deep hierarchical feature learning on point sets in a metric space. In: Advances in Neural Information Processing Systems, pp. 5099\u20135108 (2017)."},{"key":"3107_CR4","unstructured":"Li, Y., Bu, R., Sun, M., Wu, W., Di, X., Chen, B.: PointCNN: convolution On X-transformed points. In: Advances in Neural Information Processing Systems, pp. 820\u2013830 (2018)."},{"issue":"5","key":"3107_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3326362","volume":"38","author":"Y Wang","year":"2019","unstructured":"Wang, Y., Sun, Y., Liu, Z., Sarma, S.E., Bronstein, M.M., Solomon, J.M.: Dynamic graph CNN for learning on point clouds. ACM Trans. Graph. 38(5), 1\u201312 (2019)","journal-title":"ACM Trans. Graph."},{"key":"3107_CR6","doi-asserted-by":"crossref","unstructured":"Thomas, H., Qi, C.R., Deschaud, J.E., Marcotegui, B., Goulette, F., Guibas, L.J.: KPConv: Flexible and deformable convolution for point clouds. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 6411\u20136420 (2019).","DOI":"10.1109\/ICCV.2019.00651"},{"key":"3107_CR7","unstructured":"Liu, Z., Tang, H., Lin, Y., Han, S.: Point-voxel CNN for efficient 3D deep learning. In: Advances in Neural Information Processing Systems, pp. 1\u201311 (2019)."},{"key":"3107_CR8","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 1\u201311 (2017)."},{"key":"3107_CR9","doi-asserted-by":"crossref","unstructured":"Su, H., Maji, S., Kalogerakis, E., Learned-Miller, E.: Multi-view convolutional neural networks for 3D shape recognition. In: IEEE International Conference on Computer Vision (ICCV), pp. 945\u2013953 (2015).","DOI":"10.1109\/ICCV.2015.114"},{"key":"3107_CR10","doi-asserted-by":"crossref","unstructured":"Kanezaki, A., Matsushita, Y., Nishida, Y.: RotationNet: joint object categorization and pose estimation using multiviews from unsupervised viewpoints. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5010\u20135019 (2018).","DOI":"10.1109\/CVPR.2018.00526"},{"key":"3107_CR11","doi-asserted-by":"crossref","unstructured":"Feng, Y., Zhang, Z., Zhao, X., Ji, R., Gao, Y.: GVCNN: group-view convolutional neural networks for 3D shape recognition. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 264\u2013272 (2018).","DOI":"10.1109\/CVPR.2018.00035"},{"key":"3107_CR12","doi-asserted-by":"crossref","unstructured":"Jiang, J., Bao, D., Chen, Z., Zhao, X., Gao, Y.: MLVCNN: multi-loop-view convolutional neural network for 3D shape retrieval. In: AAAI Conference on Artificial Intelligence (AAAI), pp. 8513\u20138520 (2019).","DOI":"10.1609\/aaai.v33i01.33018513"},{"key":"3107_CR13","doi-asserted-by":"crossref","unstructured":"Hamdi, A., Giancola, S., Ghanem, B.: MVTN: multi-view transformation network for 3D shape recognition. In: IEEE International Conference on Computer Vision (ICCV), pp. 1\u201311 (2021).","DOI":"10.1109\/ICCV48922.2021.00007"},{"key":"3107_CR14","unstructured":"Wu, Z., Song, S., Khosla, A., Yu, F., Zhang, L., Tang, X., Xiao, J.: 3D ShapeNets: a deep representation for volumetric shapes. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1912\u20131920 (2015)."},{"key":"3107_CR15","doi-asserted-by":"crossref","unstructured":"Maturana, D., Scherer, S.: VoxNet: a 3D convolutional neural network for real-time object recognition. In: IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 922\u2013928 (2015).","DOI":"10.1109\/IROS.2015.7353481"},{"key":"3107_CR16","doi-asserted-by":"crossref","unstructured":"\u00c7i\u00e7ek, \u00d6., Abdulkadir, A., Lienkamp, S.S., Brox, T., Ronneberger, O.: 3D U-Net: learning dense volumetric segmentation from sparse annotation. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 424\u2013432 (2016).","DOI":"10.1007\/978-3-319-46723-8_49"},{"key":"3107_CR17","doi-asserted-by":"crossref","unstructured":"Tchapmi, L., Choy, C., Armeni, I., Gwak, J., Savarese, S.: Segcloud: Semantic segmentation of 3d point clouds. In: International Conference on 3D Vision (3DV), pp. 537\u2013547 (2017).","DOI":"10.1109\/3DV.2017.00067"},{"key":"3107_CR18","doi-asserted-by":"crossref","unstructured":"Riegler, G., Osman Ulusoy, A., Geiger, A.: OctNet: learning deep 3D representations at high resolutions. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3577\u20133586 (2017).","DOI":"10.1109\/CVPR.2017.701"},{"key":"3107_CR19","doi-asserted-by":"crossref","unstructured":"Choy, C., Gwak, J., Savarese, S.: 4D Spatio-temporal ConvNets: minkowski convolutional neural networks. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3075\u20133084 (2019).","DOI":"10.1109\/CVPR.2019.00319"},{"key":"3107_CR20","doi-asserted-by":"crossref","unstructured":"Tang, H., Liu, Z., Zhao, S., Lin, Y., Lin, J., Wang, H., Han, S.: Searching efficient 3D architectures with sparse point-voxel convolution. In: European Conference on Computer Vision (ECCV), pp. 685\u2013702 (2020).","DOI":"10.1007\/978-3-030-58604-1_41"},{"key":"3107_CR21","doi-asserted-by":"crossref","unstructured":"Liu, Y., Fan, B., Xiang, S., Pan, C.: Relation-shape convolutional neural network for point cloud analysis. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 8895\u20138904 (2019).","DOI":"10.1109\/CVPR.2019.00910"},{"key":"3107_CR22","doi-asserted-by":"crossref","unstructured":"Klokov, R., Lempitsky, V.: Escape from cells: deep Kd-networks for the recognition of 3D point cloud models. In: IEEE International Conference on Computer Vision (ICCV), pp. 863\u2013872 (2017).","DOI":"10.1109\/ICCV.2017.99"},{"issue":"4","key":"3107_CR23","doi-asserted-by":"publisher","first-page":"71:1","DOI":"10.1145\/3197517.3201301","volume":"37","author":"M Atzmon","year":"2018","unstructured":"Atzmon, M., Maron, H., Lipman, Y.: Point convolutional neural networks by extension operators. ACM Trans. Graph. 37(4), 71:1-71:12 (2018)","journal-title":"ACM Trans. Graph."},{"key":"3107_CR24","doi-asserted-by":"crossref","unstructured":"Xu, Y., Fan, T., Xu, M., Zeng, L., Qiao, Y.: SpiderCNN: deep learning on point sets with parameterized convolutional filters. In: European Conference on Computer Vision (ECCV), pp. 87\u2013102 (2018).","DOI":"10.1007\/978-3-030-01237-3_6"},{"key":"3107_CR25","doi-asserted-by":"crossref","unstructured":"Landrieu, L., Simonovsky, M.: Large-scale point cloud semantic segmentation with superpoint graphs. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4558\u20134567 (2018).","DOI":"10.1109\/CVPR.2018.00479"},{"key":"3107_CR26","doi-asserted-by":"crossref","unstructured":"Jiang, L., Zhao, H., Liu, S., Shen, X., Fu, C.W., Jia, J.: Hierarchical point-edge interaction network for point cloud semantic segmentation. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 10433\u201310441 (2019).","DOI":"10.1109\/ICCV.2019.01053"},{"key":"3107_CR27","doi-asserted-by":"crossref","unstructured":"Li, G., Muller, M., Thabet, A., Ghanem, B.: DeepGCNs: Can GCNs Go As Deep As CNNs?. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 9267\u20139276 (2019).","DOI":"10.1109\/ICCV.2019.00936"},{"key":"3107_CR28","doi-asserted-by":"publisher","first-page":"863","DOI":"10.1007\/s00371-021-02351-8","volume":"39","author":"L Chen","year":"2023","unstructured":"Chen, L., Zhang, Q.: DDGCN: graph convolution network based on direction and distance for point cloud learning. Vis. Comput. 39, 863\u2013873 (2023). https:\/\/doi.org\/10.1007\/s00371-021-02351-8","journal-title":"Vis. Comput."},{"key":"3107_CR29","doi-asserted-by":"publisher","first-page":"2407","DOI":"10.1007\/s00371-020-01892-8","volume":"36","author":"Y Sun","year":"2020","unstructured":"Sun, Y., Miao, Y., Chen, J., et al.: PGCNet: patch graph convolutional network for point cloud segmentation of indoor scenes. Vis. Comput. 36, 2407\u20132418 (2020). https:\/\/doi.org\/10.1007\/s00371-020-01892-8","journal-title":"Vis. Comput."},{"key":"3107_CR30","doi-asserted-by":"crossref","unstructured":"You, H., Feng, Y., Ji, R., Gao, Y.: PVNet: a joint convolutional network of point cloud and multi-view for 3D shape recognition. In: ACM International Conference on Multimedia, pp. 1310\u20131318 (2018).","DOI":"10.1145\/3240508.3240702"},{"key":"3107_CR31","doi-asserted-by":"crossref","unstructured":"You, H., Feng, Y., Zhao, X., Zou, C., Ji, R., Gao, Y.: PVRNet: point-view relation neural network for 3D shape recognition. In: AAAI Conference on Artificial Intelligence (AAAI), pp. 9119\u20139126 (2019).","DOI":"10.1609\/aaai.v33i01.33019119"},{"key":"3107_CR32","doi-asserted-by":"crossref","unstructured":"Le, T., Duan, Y.: PointGrid: a deep network for 3D shape understanding. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9204\u20139214 (2018).","DOI":"10.1109\/CVPR.2018.00959"},{"key":"3107_CR33","doi-asserted-by":"crossref","unstructured":"Shi, S., Guo, C., Jiang, L., Wang, Z., Shi, J., Wang, X., Li, H.: PV-RCNN: Point-voxel feature set abstraction for 3D object detection. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10529\u201310538 (2020).","DOI":"10.1109\/CVPR42600.2020.01054"},{"key":"3107_CR34","doi-asserted-by":"crossref","unstructured":"Noh, J., Lee, S., Ham, B.: HVPR: Hybrid voxel-point representation for single-stage 3D object detection. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 14605\u201314614 (2021).","DOI":"10.1109\/CVPR46437.2021.01437"},{"key":"3107_CR35","doi-asserted-by":"crossref","unstructured":"Xu, J., Zhang, R., Dou, J., Zhu, Y., Sun, J., Pu, S.: RPVNet: a deep and efficient range-point-voxel fusion network for LiDAR point cloud segmentation. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 16024\u201316033 (2021).","DOI":"10.1109\/ICCV48922.2021.01572"},{"key":"3107_CR36","doi-asserted-by":"crossref","unstructured":"Hu, H., Zhang, Z., Xie, Z., Lin, S.: Local relation networks for image recognition. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3464\u20133473 (2019).","DOI":"10.1109\/ICCV.2019.00356"},{"key":"3107_CR37","doi-asserted-by":"crossref","unstructured":"Liu, X., Han, Z., Liu, Y.S., Zwicker, M.: Point2Sequence: learning the shape representation of 3D point clouds with an attention-based sequence to sequence network. In: AAAI Conference on Artificial Intelligence (AAAI), pp. 8778\u20138785 (2019).","DOI":"10.1609\/aaai.v33i01.33018778"},{"key":"3107_CR38","doi-asserted-by":"crossref","unstructured":"Yan, X., Zheng, C., Li, Z., Wang, S., Cui, S.: PointASNL: robust point clouds processing using nonlocal neural networks with adaptive sampling. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5589\u20135598 (2020).","DOI":"10.1109\/CVPR42600.2020.00563"},{"key":"3107_CR39","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., Houlsby, N.: An image is worth 16\u00d716 words: transformers for image recognition at scale. In: International Conference on Learning Representations (ICLR), pp. 1\u201312 (2021)."},{"key":"3107_CR40","unstructured":"Lee, J., Lee, Y., Kim, J., Kosiorek, A., Choi, S., Teh, Y.W.: Set transformer: a framework for attention-based permutation-invariant neural networks. In: International Conference on Machine Learning (ICML), pp. 3744\u20133753 (2019)."},{"issue":"2","key":"3107_CR41","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1007\/s41095-021-0229-5","volume":"7","author":"MH Guo","year":"2021","unstructured":"Guo, M.H., Cai, J.X., Liu, Z.N., Mu, T.J., Martin, R.R., Hu, S.M.: PCT: point cloud transformer. Comput. Vis. Media 7(2), 187\u2013199 (2021)","journal-title":"Comput. Vis. Media"},{"key":"3107_CR42","doi-asserted-by":"crossref","unstructured":"Zhao, H., Jiang, L., Jia, J., Torr, P.H., Koltun, V.: Point transformer. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 16259\u201316268 (2021).","DOI":"10.1109\/ICCV48922.2021.01595"},{"key":"3107_CR43","doi-asserted-by":"publisher","DOI":"10.1007\/s00371-022-02688-8","author":"Y He","year":"2022","unstructured":"He, Y., Xia, G., Feng, H., et al.: PCTP: point cloud transformer pooling block for points set abstraction structure. Vis. Comput. (2022). https:\/\/doi.org\/10.1007\/s00371-022-02688-8","journal-title":"Vis. Comput."},{"issue":"6","key":"3107_CR44","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2980179.2980238","volume":"35","author":"L Yi","year":"2016","unstructured":"Yi, L., Kim, V.G., Ceylan, D., Shen, I.C., Yan, M., Su, H., Lu, C., Huang, Q., Sheffer, A., Guibas, L.: A scalable active framework for region annotation in 3D shape collections. ACM Trans. Graph. 35(6), 1\u201312 (2016)","journal-title":"ACM Trans. Graph."},{"key":"3107_CR45","doi-asserted-by":"crossref","unstructured":"Armeni, I., Sener, O., Zamir, A.R., Jiang, H., Brilakis, I., Fischer, M., Savarese, S.: 3D semantic parsing of large-scale indoor spaces. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1534\u20131543 (2016).","DOI":"10.1109\/CVPR.2016.170"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-023-03107-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-023-03107-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-023-03107-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,24]],"date-time":"2024-07-24T13:27:11Z","timestamp":1721827631000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-023-03107-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,9]]},"references-count":45,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2024,8]]}},"alternative-id":["3107"],"URL":"https:\/\/doi.org\/10.1007\/s00371-023-03107-2","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10,9]]},"assertion":[{"value":"11 September 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 October 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}