{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T16:52:41Z","timestamp":1777567961827,"version":"3.51.4"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,7,27]],"date-time":"2024-07-27T00:00:00Z","timestamp":1722038400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,7,27]],"date-time":"2024-07-27T00:00:00Z","timestamp":1722038400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s11263-024-02162-z","type":"journal-article","created":{"date-parts":[[2024,7,27]],"date-time":"2024-07-27T12:01:55Z","timestamp":1722081715000},"page":"275-290","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":21,"title":["Position-Guided Point Cloud Panoptic Segmentation Transformer"],"prefix":"10.1007","volume":"133","author":[{"given":"Zeqi","family":"Xiao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenwei","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tai","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chen Change","family":"Loy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dahua","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiangmiao","family":"Pang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,7,27]]},"reference":[{"issue":"4","key":"2162_CR1","doi-asserted-by":"publisher","first-page":"5432","DOI":"10.1109\/LRA.2020.3007440","volume":"5","author":"I Alonso","year":"2020","unstructured":"Alonso, I., et al. (2020). 3D-mininet: Learning a 2D representation from point clouds for fast and efficient 3d lidar semantic segmentation. IEEE Robotics and Automation Letters, 5(4), 5432\u20135439.","journal-title":"IEEE Robotics and Automation Letters"},{"key":"2162_CR2","doi-asserted-by":"crossref","unstructured":"Behley, J., et al. (2019) Semantickitti: A dataset for semantic scene understanding of lidar sequences. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 9297\u2013 9307).","DOI":"10.1109\/ICCV.2019.00939"},{"key":"2162_CR3","doi-asserted-by":"crossref","unstructured":"Behley, J., Milioto, A., & Stachniss, C. (2021) A benchmark for LiDAR-based panoptic segmentation based on KITTI. In 2021 IEEE international conference on robotics and automation (ICRA) (pp. 13596\u201313603). IEEE.","DOI":"10.1109\/ICRA48506.2021.9561476"},{"key":"2162_CR4","doi-asserted-by":"crossref","unstructured":"Bichen, Wu., et al. (2018). Squeezeseg: Convolutional neural nets with recurrent CRF for real-time road-object segmentation from 3D lidar point cloud. In IEEE international conference on robotics and automation (ICRA) (pp. 1887\u20131893). IEEE.","DOI":"10.1109\/ICRA.2018.8462926"},{"key":"2162_CR5","doi-asserted-by":"crossref","unstructured":"Caesar, H., et al. (2020) nuscenes: A multimodal dataset for autonomous driving. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 11621\u201311631).","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"2162_CR6","doi-asserted-by":"crossref","unstructured":"Carion, N., et al. (2020). End-to-end object detection with transformers. In Computer Vision- ECCV 2020: 16th European Conference Glasgow, UK, August 23\u201328, 2020, Proceedings, Part I 16. (pp. 213-229). Springer.","DOI":"10.1007\/978-3-030-58452-8_13"},{"issue":"4","key":"2162_CR7","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L-C Chen","year":"2017","unstructured":"Chen, L.-C., et al. (2017). Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFS. IEEE Transactions on Pattern Analysis and Machine Intelligence, 40(4), 834\u2013848.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2162_CR8","doi-asserted-by":"crossref","unstructured":"Cheng, B., et al. (2020). Panoptic-deeplab: A simple, strong, and fast baseline for bottom-up panoptic segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 12475\u201312485).","DOI":"10.1109\/CVPR42600.2020.01249"},{"key":"2162_CR9","doi-asserted-by":"crossref","unstructured":"Cheng, R., et al. (2021). 2-s3net: Attentive feature fusion with adaptive feature selection for sparse semantic segmentation network. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 12547\u201312556).","DOI":"10.1109\/CVPR46437.2021.01236"},{"key":"2162_CR10","doi-asserted-by":"crossref","unstructured":"Cheng, B., et al. (2021). Masked-attention mask transformer for universal image segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 1290\u20131299).","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"2162_CR11","first-page":"17864","volume":"34","author":"B Cheng","year":"2021","unstructured":"Cheng, B., Schwing, A., & Kirillov, A. (2021). Per-pixel classification is not all you need for semantic segmentation. Advances in Neural Information Processing Systems, 34, 17864\u201317875.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2162_CR12","doi-asserted-by":"crossref","unstructured":"Engelmann, F., et al. (2020). 3d-mpa: Multiproposal aggregation for 3d semantic instancesegmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 9031\u20139040).","DOI":"10.1109\/CVPR42600.2020.00905"},{"key":"2162_CR13","doi-asserted-by":"crossref","unstructured":"Fan, L., et al. (2022). Embracing single stride 3D object detector with sparse transformer. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 8458\u20138468).","DOI":"10.1109\/CVPR52688.2022.00827"},{"issue":"2","key":"2162_CR14","doi-asserted-by":"publisher","first-page":"3795","DOI":"10.1109\/LRA.2022.3148457","volume":"7","author":"WK Fong","year":"2022","unstructured":"Fong, W. K., et al. (2022). Panoptic nuscenes: A large-scale benchmark for lidar panoptic segmentation and tracking. IEEE Robotics and Automation Letters, 7(2), 3795\u20133802.","journal-title":"IEEE Robotics and Automation Letters"},{"issue":"2","key":"2162_CR15","doi-asserted-by":"publisher","first-page":"3216","DOI":"10.1109\/LRA.2021.3060405","volume":"6","author":"S Gasperini","year":"2021","unstructured":"Gasperini, S., et al. (2021). Panoster: End-to-end panoptic segmentation of lidar point clouds. IEEE Robotics and Automation Letters, 6(2), 3216\u20133223.","journal-title":"IEEE Robotics and Automation Letters"},{"key":"2162_CR16","doi-asserted-by":"crossref","unstructured":"Graham, B., Engelcke, M., & Van Der Maaten, L., . (2018). 3d semantic segmentation with submanifold sparse convolutional networks. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 9224\u20139232).","DOI":"10.1109\/CVPR.2018.00961"},{"key":"2162_CR17","doi-asserted-by":"crossref","unstructured":"Han, L., et al. (2020). Occuseg: Occupancy-aware 3D instance segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 2940\u20132949).","DOI":"10.1109\/CVPR42600.2020.00301"},{"key":"2162_CR18","doi-asserted-by":"crossref","unstructured":"Hong, F., et al. (2021). Lidar-based panoptic segmentation via dynamic shifting network. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 13090\u201313099).","DOI":"10.1109\/CVPR46437.2021.01289"},{"key":"2162_CR19","doi-asserted-by":"crossref","unstructured":"Hou, Y., et al. (2022). Point-to-voxel knowledge distillation for lidar semantic segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 8479\u20138488).","DOI":"10.1109\/CVPR52688.2022.00829"},{"key":"2162_CR20","doi-asserted-by":"crossref","unstructured":"Hu, Q., et al. (2020). Randla-net: Efficient semantic segmentation of large-scale point clouds. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 11108\u201311117).","DOI":"10.1109\/CVPR42600.2020.01112"},{"key":"2162_CR21","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al. (2019). Panoptic feature pyramid networks. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 6399\u20136408).","DOI":"10.1109\/CVPR.2019.00656"},{"key":"2162_CR22","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al. (2019). Panoptic segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 9404\u20139413).","DOI":"10.1109\/CVPR.2019.00963"},{"key":"2162_CR23","doi-asserted-by":"crossref","unstructured":"Lang, AH., et al. (2019). Pointpillars: Fast encoders for object detection from point clouds. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 12697\u201312705).","DOI":"10.1109\/CVPR.2019.01298"},{"key":"2162_CR24","doi-asserted-by":"crossref","unstructured":"Li, J., et al. (2022). Panoptic-PHNet: Towards real-time and high-precision LiDAR panoptic segmentation via clustering pseudo heatmap. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 11809\u20131118).","DOI":"10.1109\/CVPR52688.2022.01151"},{"key":"2162_CR25","doi-asserted-by":"crossref","unstructured":"Li, Q., Qi, X., & Torr, PHS. (2020). Unifying training and inference for panoptic segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 13320\u201313328).","DOI":"10.1109\/CVPR42600.2020.01333"},{"key":"2162_CR26","doi-asserted-by":"crossref","unstructured":"Lin, T-Yi., et al. (2014 ). Microsoft coco: Common objects in context. In Computer vision-ECCV 2014: 13th European conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13. (pp. 740\u2013755). Springer.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"2162_CR27","doi-asserted-by":"crossref","unstructured":"Lin, T-Yi., et al. (2017). Focal loss for dense object detection. In Proceedings of the IEEE international conference on computer vision (pp. 2980\u20132988).","DOI":"10.1109\/ICCV.2017.324"},{"key":"2162_CR28","unstructured":"Loshchilov, I., & Hutter, F. (2017). Decoupled weight decay regularization. arXiv Preprint retrieved from arXiv:1711.05101"},{"key":"2162_CR29","doi-asserted-by":"crossref","unstructured":"Lyu, Y., Huang, X., & Zhang, Z. (2020). Learning to segment 3D point clouds in 2D image space. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 12255\u201312264).","DOI":"10.1109\/CVPR42600.2020.01227"},{"key":"2162_CR30","doi-asserted-by":"crossref","unstructured":"Mao, J., Wang, X., & Li, H. (2019). Interpolated convolutional networks for 3D point cloud understanding. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 1578\u20131587).","DOI":"10.1109\/ICCV.2019.00166"},{"issue":"2","key":"2162_CR31","doi-asserted-by":"publisher","first-page":"1141","DOI":"10.1109\/LRA.2023.3236568","volume":"8","author":"R Marcuzzi","year":"2023","unstructured":"Marcuzzi, R., et al. (2023). Mask-based panoptic lidar segmentation for autonomous driving. IEEE Robotics and Automation Letters, 8(2), 1141\u20131148.","journal-title":"IEEE Robotics and Automation Letters"},{"key":"2162_CR32","doi-asserted-by":"crossref","unstructured":"Meng, H-Yu., et al. (2019). Vv-net: Voxel vae net with group convolutions for point cloud segmentation. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 8500\u20138508).","DOI":"10.1109\/ICCV.2019.00859"},{"key":"2162_CR33","doi-asserted-by":"crossref","unstructured":"Milioto, A., et al. (2019). Rangenet++: Fast and accurate lidar semantic segmentation. In IEEE\/RSJ international conference on intelligent robots and systems (IROS) (pp. 4213\u20134220). IEEE.","DOI":"10.1109\/IROS40897.2019.8967762"},{"key":"2162_CR34","doi-asserted-by":"crossref","unstructured":"Milioto, A., et al. (2020). Lidar panoptic segmentation for autonomous driving. In 2020IEEE\/RSJ international conference on intelligent robots and systems (IROS) (pp. 8505\u20138512). IEEE.","DOI":"10.1109\/IROS45743.2020.9340837"},{"key":"2162_CR35","unstructured":"MMDetection3D Contributors. (2020). MMDetection3D: OpenMMLab next-generation platform for general 3D object detection. https:\/\/github.com\/openmmlab\/mmdetection3d"},{"key":"2162_CR36","doi-asserted-by":"crossref","unstructured":"Porzi, L., et al. (2019). Seamless scene segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 8277\u20138286).","DOI":"10.1109\/CVPR.2019.00847"},{"key":"2162_CR37","unstructured":"Qi, CR., et al. (2017). Pointnet: Deep learning on point sets for 3D classification and segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 652\u2013660)."},{"key":"2162_CR38","unstructured":"Qi, RC., et al. (2017). Pointnet++: Deep hierarchical feature learning on point sets in a metric space. Advances in Neural Information Processing Systems, 30"},{"key":"2162_CR39","doi-asserted-by":"crossref","unstructured":"Razani, R., et al. (2021). GP-S3Net: Graph-based panoptic sparse semantic segmentation network. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 16076\u201316085).","DOI":"10.1109\/ICCV48922.2021.01577"},{"issue":"3","key":"2162_CR40","doi-asserted-by":"publisher","first-page":"1894","DOI":"10.1109\/TRO.2021.3122069","volume":"38","author":"K Sirohi","year":"2021","unstructured":"Sirohi, K., et al. (2021). Efficientlps: Efficient lidar panoptic segmentation. IEEE Transactions on Robotics, 38(3), 1894\u20131914.","journal-title":"IEEE Transactions on Robotics"},{"key":"2162_CR41","doi-asserted-by":"crossref","unstructured":"Su, S., et al. (2023). PUPS: Point cloud unified panoptic segmentation. arXiv Preprint retrieved from arXiv:2302.06185","DOI":"10.1609\/aaai.v37i2.25329"},{"key":"2162_CR42","unstructured":"Sudre, CH. et al. (2017). Generalised dice overlap as a deep learning loss function for highly unbalanced segmentations. In Deep learning in medical image analysis and multimodal learning for clinical decision support: Third international workshop, DLMIA 2017, and 7th international workshop, ML-CDS 2017, Held in Conjunction with MICCAI 2017, Qu\u00e9bec City, QC, Canada, September 14, Proceedings 3. (pp. 240\u2013248). Springer."},{"key":"2162_CR43","doi-asserted-by":"crossref","unstructured":"Tang, H., et al. (2020). Searching efficient 3D architectures with sparse point-voxel convolution. In Computer vision-ECCV 2020: 16th European conference Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXVIII. (pp. 685\u2013702). Springer.","DOI":"10.1007\/978-3-030-58604-1_41"},{"key":"2162_CR44","doi-asserted-by":"crossref","unstructured":"Thomas, H., et al. (2019). Kpconv: Flexible and deformable convolution for point clouds. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 6411\u20136420).","DOI":"10.1109\/ICCV.2019.00651"},{"key":"2162_CR45","first-page":"17721","volume":"33","author":"X Wang","year":"2020","unstructured":"Wang, X., et al. (2020). Solov2: Dynamic and fast instance segmentation. Advances in Neural Information Processing Systems, 33, 17721\u201317732.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2162_CR46","doi-asserted-by":"crossref","unstructured":"Wu, W., Qi, Z., & Fuxin, L. (2019). Pointconv: Deep convolutional networks on 3D point clouds. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 9621\u20139630).","DOI":"10.1109\/CVPR.2019.00985"},{"key":"2162_CR47","doi-asserted-by":"crossref","unstructured":"Xiong, Y., et al. (2019). Upsnet: A unified panoptic segmentation network. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 8818\u20138826).","DOI":"10.1109\/CVPR.2019.00902"},{"key":"2162_CR48","doi-asserted-by":"crossref","unstructured":"Xu,J., et al. (2021). Rpvnet: A deep and efficient range-point-voxel fusion network for lidar point cloud segmentation. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 16024\u201316033).","DOI":"10.1109\/ICCV48922.2021.01572"},{"issue":"3","key":"2162_CR49","doi-asserted-by":"publisher","first-page":"2920","DOI":"10.1609\/aaai.v36i3.20197","volume":"36","author":"S Xu","year":"2022","unstructured":"Xu, S., et al. (2022). Sparse cross-scale attention network for efficient lidar panoptic segmentation. Proceedings of the AAAI Conference on Artificial Intelligence, 36(3), 2920\u20132928.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"2162_CR50","first-page":"10326","volume":"34","author":"W Zhang","year":"2021","unstructured":"Zhang, W., et al. (2021). K-net: Towards unified image segmentation. Advances in Neural Information Processing Systems, 34, 10326\u201310338.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2162_CR51","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Zhang, Y., Foroosh, H. (2021) Panoptic-polarnet: Proposal-free lidar point cloud panoptic segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 13194\u201313203).","DOI":"10.1109\/CVPR46437.2021.01299"},{"key":"2162_CR52","doi-asserted-by":"crossref","unstructured":"Zhu, X., et al. (2021). Cylindrical and asymmetrical 3D convolution networks for lidar segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 9939\u20139948).","DOI":"10.1109\/CVPR46437.2021.00981"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02162-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-024-02162-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02162-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,7]],"date-time":"2025-01-07T06:12:19Z","timestamp":1736230339000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-024-02162-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,27]]},"references-count":52,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["2162"],"URL":"https:\/\/doi.org\/10.1007\/s11263-024-02162-z","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,7,27]]},"assertion":[{"value":"7 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 June 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 July 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}