{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T05:45:58Z","timestamp":1757310358174,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":30,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819985425"},{"type":"electronic","value":"9789819985432"}],"license":[{"start":{"date-parts":[[2023,12,29]],"date-time":"2023-12-29T00:00:00Z","timestamp":1703808000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,29]],"date-time":"2023-12-29T00:00:00Z","timestamp":1703808000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8543-2_11","type":"book-chapter","created":{"date-parts":[[2023,12,28]],"date-time":"2023-12-28T10:03:03Z","timestamp":1703757783000},"page":"130-141","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["TSTD:A Cross-modal Two Stages Network with\u00a0New Trans-decoder for\u00a0Point Cloud Semantic Segmentation"],"prefix":"10.1007","author":[{"given":"Zhao","family":"Gao","sequence":"first","affiliation":[]},{"given":"Li","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Hong","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Pengcheng","family":"Wei","sequence":"additional","affiliation":[]},{"given":"Hao","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,29]]},"reference":[{"key":"11_CR1","doi-asserted-by":"crossref","unstructured":"Chiang, H.Y., et\u00a0al.: A unified point-based framework for 3d segmentation. In: 2019 International Conference on 3D Vision (3DV), pp. 155\u2013163. IEEE (2019)","DOI":"10.1109\/3DV.2019.00026"},{"key":"11_CR2","doi-asserted-by":"crossref","unstructured":"Choy, C., et\u00a0al.: 4d spatio-temporal convnets: minkowski convolutional neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3075\u20133084 (2019)","DOI":"10.1109\/CVPR.2019.00319"},{"key":"11_CR3","doi-asserted-by":"crossref","unstructured":"Dai, A., et\u00a0al.: Scannet: richly-annotated 3d reconstructions of indoor scenes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5828\u20135839 (2017)","DOI":"10.1109\/CVPR.2017.261"},{"key":"11_CR4","doi-asserted-by":"crossref","unstructured":"Dai, A., Nie\u00dfner, M.: 3dmv: joint 3d-multi-view prediction for 3d semantic scene segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 452\u2013468 (2018)","DOI":"10.1007\/978-3-030-01249-6_28"},{"key":"11_CR5","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"11_CR6","doi-asserted-by":"crossref","unstructured":"Graham, B., et\u00a0al.: 3d semantic segmentation with submanifold sparse convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 9224\u20139232 (2018)","DOI":"10.1109\/CVPR.2018.00961"},{"key":"11_CR7","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1007\/s41095-021-0229-5","volume":"7","author":"MH Guo","year":"2021","unstructured":"Guo, M.H., et al.: Pct: point cloud transformer. Comput. Visual Media 7, 187\u2013199 (2021)","journal-title":"Comput. Visual Media"},{"key":"11_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-319-54181-5_14","volume-title":"Computer Vision \u2013 ACCV 2016","author":"C Hazirbas","year":"2017","unstructured":"Hazirbas, C., Ma, L., Domokos, C., Cremers, D.: FuseNet: incorporating depth into semantic segmentation via fusion-based CNN architecture. In: Lai, S.-H., Lepetit, V., Nishino, K., Sato, Y. (eds.) ACCV 2016. LNCS, vol. 10111, pp. 213\u2013228. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-54181-5_14"},{"key":"11_CR9","doi-asserted-by":"crossref","unstructured":"Hu, Q., et\u00a0al.: Randla-net: efficient semantic segmentation of large-scale point clouds. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11108\u201311117 (2020)","DOI":"10.1109\/CVPR42600.2020.01112"},{"key":"11_CR10","doi-asserted-by":"crossref","unstructured":"Lei, H., et\u00a0al.: Seggcn: efficient 3d point cloud segmentation with fuzzy spherical kernel. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11611\u201311620 (2020)","DOI":"10.1109\/CVPR42600.2020.01163"},{"key":"11_CR11","unstructured":"Li, Y., et\u00a0al.: Pointcnn: convolution on x-transformed points. In: Advances in neural information processing systems 31 (2018)"},{"key":"11_CR12","doi-asserted-by":"crossref","unstructured":"Liu, Z., et\u00a0al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"issue":"2","key":"11_CR13","doi-asserted-by":"publisher","first-page":"1332","DOI":"10.1109\/LRA.2021.3138539","volume":"7","author":"D Menini","year":"2021","unstructured":"Menini, D., Kumar, S., et al.: A real-time online learning framework for joint 3d reconstruction and semantic segmentation of indoor scenes. IEEE Robot. Autom. Lett. 7(2), 1332\u20131339 (2021)","journal-title":"IEEE Robot. Autom. Lett."},{"key":"11_CR14","doi-asserted-by":"crossref","unstructured":"Narita, G., et\u00a0al.: Panopticfusion: online volumetric semantic mapping at the level of stuff and things. In: 2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 4205\u20134212. IEEE (2019)","DOI":"10.1109\/IROS40897.2019.8967890"},{"key":"11_CR15","unstructured":"Qi, C.R., et\u00a0al.: Pointnet: deep learning on point sets for 3d classification and segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 652\u2013660 (2017)"},{"key":"11_CR16","unstructured":"Qi, C.R., et\u00a0al.: Pointnet++: deep hierarchical feature learning on point sets in a metric space. In: Advances in Neural Information Processing Systems 30 (2017)"},{"issue":"8","key":"11_CR17","doi-asserted-by":"publisher","first-page":"11836","DOI":"10.1109\/TITS.2021.3107672","volume":"23","author":"Y Qian","year":"2021","unstructured":"Qian, Y., Deng, L., et al.: Gated-residual block for semantic segmentation using rgb-d data. IEEE Trans. Intell. Transp. Syst. 23(8), 11836\u201311844 (2021)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"11_CR18","doi-asserted-by":"publisher","unstructured":"Shi, B., et\u00a0al.: A transformer-based decoder for semantic segmentation with multi-level context mining. In: ECCV 2022, Part XXVIII, pp. 624\u2013639. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19815-1_36","DOI":"10.1007\/978-3-031-19815-1_36"},{"issue":"1","key":"11_CR19","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1109\/TCSVT.2021.3056726","volume":"32","author":"W Shi","year":"2021","unstructured":"Shi, W., Xu, J., et al.: Rgb-d semantic segmentation and label-oriented voxelgrid fusion for accurate 3d semantic mapping. IEEE Trans. Circuits Syst. Video Technol. 32(1), 183\u2013197 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"3","key":"11_CR20","doi-asserted-by":"publisher","first-page":"382","DOI":"10.1109\/TMRB.2020.3009527","volume":"2","author":"W Shi","year":"2020","unstructured":"Shi, W., Zhu, D., et al.: Multilevel cross-aware rgbd indoor semantic segmentation for bionic binocular robot. IEEE Trans. Med. Robot. Bionics 2(3), 382\u2013390 (2020)","journal-title":"IEEE Trans. Med. Robot. Bionics"},{"key":"11_CR21","doi-asserted-by":"crossref","unstructured":"Thomas, H., et\u00a0al.: Kpconv: flexible and deformable convolution for point clouds. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6411\u20136420 (2019)","DOI":"10.1109\/ICCV.2019.00651"},{"issue":"5","key":"11_CR22","doi-asserted-by":"publisher","first-page":"1239","DOI":"10.1007\/s11263-019-01188-y","volume":"128","author":"A Valada","year":"2020","unstructured":"Valada, A., et al.: Self-supervised model adaptation for multimodal semantic segmentation. Int. J. Comput. Vision 128(5), 1239\u20131285 (2020)","journal-title":"Int. J. Comput. Vision"},{"key":"11_CR23","doi-asserted-by":"crossref","unstructured":"Wang, J., Sun, B., Lu, Y.: Mvpnet: multi-view point regression networks for 3d object reconstruction from a single image. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a033, pp. 8949\u20138956 (2019)","DOI":"10.1609\/aaai.v33i01.33018949"},{"key":"11_CR24","doi-asserted-by":"crossref","unstructured":"Wu, W., et\u00a0al.: Pointconv: deep convolutional networks on 3d point clouds. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9621\u20139630 (2019)","DOI":"10.1109\/CVPR.2019.00985"},{"key":"11_CR25","first-page":"12077","volume":"34","author":"E Xie","year":"2021","unstructured":"Xie, E., et al.: Segformer: simple and efficient design for semantic segmentation with transformers. Adv. Neural. Inf. Process. Syst. 34, 12077\u201312090 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"5","key":"11_CR26","doi-asserted-by":"publisher","first-page":"1294","DOI":"10.3390\/rs14051294","volume":"14","author":"L Yan","year":"2022","unstructured":"Yan, L., et al.: Efficient depth fusion transformer for aerial image semantic segmentation. Remote Sens. 14(5), 1294 (2022)","journal-title":"Remote Sens."},{"key":"11_CR27","doi-asserted-by":"crossref","unstructured":"Yan, X., et\u00a0al.: Pointasnl: robust point clouds processing using nonlocal neural networks with adaptive sampling. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5589\u20135598 (2020)","DOI":"10.1109\/CVPR42600.2020.00563"},{"key":"11_CR28","unstructured":"Zhang, J., et\u00a0al.: Cmx: cross-modal fusion for rgb-x semantic segmentation with transformers. arXiv preprint arXiv:2203.04838 (2022)"},{"key":"11_CR29","doi-asserted-by":"crossref","unstructured":"Zhao, H., et\u00a0al.: Pyramid scene parsing network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2881\u20132890 (2017)","DOI":"10.1109\/CVPR.2017.660"},{"key":"11_CR30","doi-asserted-by":"crossref","unstructured":"Zhao, H., et\u00a0al.: Point transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 16259\u201316268 (2021)","DOI":"10.1109\/ICCV48922.2021.01595"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8543-2_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,28]],"date-time":"2023-12-28T10:05:00Z","timestamp":1703757900000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8543-2_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,29]]},"ISBN":["9789819985425","9789819985432"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8543-2_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023,12,29]]},"assertion":[{"value":"29 December 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Xiamen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/prcv2023.xmu.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1420","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"532","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"37% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,78","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,69","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}