{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T20:15:58Z","timestamp":1776888958440,"version":"3.51.2"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2023,6,11]],"date-time":"2023-06-11T00:00:00Z","timestamp":1686441600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,6,11]],"date-time":"2023-06-11T00:00:00Z","timestamp":1686441600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62032011"],"award-info":[{"award-number":["62032011"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61772257"],"award-info":[{"award-number":["61772257"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2024,8]]},"DOI":"10.1007\/s00371-023-02907-w","type":"journal-article","created":{"date-parts":[[2023,6,11]],"date-time":"2023-06-11T16:01:32Z","timestamp":1686499292000},"page":"5155-5167","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["MFFNet: multimodal feature fusion network for point cloud semantic segmentation"],"prefix":"10.1007","volume":"40","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2943-0931","authenticated-orcid":false,"given":"Dayong","family":"Ren","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiawei","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhengyi","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jie","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingqiang","family":"Wei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanwen","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,6,11]]},"reference":[{"issue":"6","key":"2907_CR1","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 39(6), 1137\u20131149 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"7","key":"2907_CR2","doi-asserted-by":"publisher","first-page":"2303","DOI":"10.1007\/s00371-021-02112-7","volume":"38","author":"T Liu","year":"2022","unstructured":"Liu, T., Cai, Y., Zheng, J., Thalmann, N.M.: Beacon: a boundary embedded attentional convolution network for point cloud instance segmentation. Vis. Comput. 38(7), 2303\u20132313 (2022)","journal-title":"Vis. Comput."},{"issue":"10","key":"2907_CR3","doi-asserted-by":"publisher","first-page":"2407","DOI":"10.1007\/s00371-020-01892-8","volume":"36","author":"Y Sun","year":"2020","unstructured":"Sun, Y., Miao, Y., Chen, J., Pajarola, R.: Pgcnet: patch graph convolutional network for point cloud segmentation of indoor scenes. Vis. Comput. 36(10), 2407\u20132418 (2020)","journal-title":"Vis. Comput."},{"issue":"1\u20133","key":"2907_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/0600000079","volume":"12","author":"J Janai","year":"2020","unstructured":"Janai, J., G\u00fcney, F., Behl, A., Geiger, A., et al.: Computer vision for autonomous vehicles: problems, datasets and state of the art. Found. Trends Comput. Graph. Vis. 12(1\u20133), 1\u2013308 (2020)","journal-title":"Found. Trends Comput. Graph. Vis."},{"key":"2907_CR5","doi-asserted-by":"crossref","unstructured":"Yang, F., Li, X., Shen, J.: Nested architecture search for point cloud semantic segmentation. IEEE Trans. Image Process. 32, 2889\u20132418 (2022)","DOI":"10.1109\/TIP.2022.3147983"},{"key":"2907_CR6","doi-asserted-by":"crossref","unstructured":"Yin, J., Zhou, D., Zhang, L., Fang, J., Xu, C.-Z., Shen, J., Wang, W.: Proposalcontrast: Unsupervised pre-training for lidar-based 3d object detection. In: Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXXIX, pp. 17\u201333. Springer (2022)","DOI":"10.1007\/978-3-031-19842-7_2"},{"key":"2907_CR7","doi-asserted-by":"crossref","unstructured":"Yin, J., Fang, J., Zhou, D., Zhang, L., Xu, C.-Z., Shen, J., Wang, W.: Semi-supervised 3d object detection with proficient teachers. In: Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXXVIII, pp. 727\u2013743. Springer (2022)","DOI":"10.1007\/978-3-031-19839-7_42"},{"key":"2907_CR8","doi-asserted-by":"crossref","unstructured":"Jaritz, M., Gu, J., Su, H.: Multi-view pointnet for 3d scene understanding. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops, pp. 0\u20130 (2019)","DOI":"10.1109\/ICCVW.2019.00494"},{"issue":"4","key":"2907_CR9","doi-asserted-by":"publisher","first-page":"90","DOI":"10.3390\/technologies10040090","volume":"10","author":"G Rizzoli","year":"2022","unstructured":"Rizzoli, G., Barbato, F., Zanuttigh, P.: Multimodal semantic segmentation in autonomous driving: a review of current approaches and future perspectives. Technologies 10(4), 90 (2022)","journal-title":"Technologies"},{"key":"2907_CR10","doi-asserted-by":"crossref","unstructured":"Meng, Q., Wang, W., Zhou, T., Shen, J., Jia, Y., Van\u00a0Gool, L.: Towards a weakly supervised framework for 3d point cloud object detection and annotation. IEEE Trans. Pattern Anal. Mach. Intell. 44(8), 4454\u20134468 (2022)","DOI":"10.1109\/TPAMI.2021.3063611"},{"key":"2907_CR11","unstructured":"Qian, G., Li, Y., Peng, H., Mai, J., Hammoud, H.A.A.K., Elhoseiny, M., Ghanem, B.: Pointnext: Revisiting pointnet++ with improved training and scaling strategies (2022). arXiv preprint arXiv:2206.04670"},{"key":"2907_CR12","doi-asserted-by":"crossref","unstructured":"Wu, W., Qi, Z., Li, F.: Pointconv: Deep convolutional networks on 3d point clouds. In: Proceedings of the IEEE Conferenceon Computer Vision and Pattern Recognition, pp. 9621\u20139630 (2019)","DOI":"10.1109\/CVPR.2019.00985"},{"key":"2907_CR13","doi-asserted-by":"crossref","unstructured":"You, H., Feng, Y., Ji, R., Gao, Y.: Pvnet: A joint convolutional network of point cloud and multi-view for 3d shape recognition. In: MM, pp. 1310\u20131318 (2018)","DOI":"10.1145\/3240508.3240702"},{"issue":"2","key":"2907_CR14","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1109\/TPAMI.2018.2798607","volume":"41","author":"T Baltrusaitis","year":"2019","unstructured":"Baltrusaitis, T., Ahuja, C., Morency, L.: Multimodal machine learning: a survey and taxonomy. IEEE Trans. Pattern Anal. Mach. Intell. 41(2), 423\u2013443 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2907_CR15","doi-asserted-by":"crossref","unstructured":"Chen, X., Ma, H., Wan, J., Li, B., Xia, T.: Multi-view 3d object detection network for autonomous driving. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1907\u20131915 (2017)","DOI":"10.1109\/CVPR.2017.691"},{"key":"2907_CR16","unstructured":"Wang, J., Wei, Z., Zhang, T., Zeng, W.: Deeply-fused nets (2016). CoRR arXiv:1605.07716"},{"key":"2907_CR17","unstructured":"Larsson, G., Maire, M., Shakhnarovich, G.: Fractalnet: Ultra-deep neural networks without residuals. In: ICLR (2017)"},{"key":"2907_CR18","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: Pointnet: Deep learning on point sets for 3d classification and segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 77\u201385 (2017)"},{"key":"2907_CR19","unstructured":"Wu, Z., Song, S., Khosla, A., Yu, F., Zhang, L., Tang, X., Xiao, J.: 3d shapenets: A deep representation for volumetric shapes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1912\u20131920"},{"key":"2907_CR20","doi-asserted-by":"crossref","unstructured":"Su, H., Maji, S., Kalogerakis, E., Learned-Miller, E.: Multi-view convolutional neural networks for 3d shape recognition. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 945\u2013953 (2015)","DOI":"10.1109\/ICCV.2015.114"},{"key":"2907_CR21","unstructured":"Qi, C.R., Yi, L., Su, H., Guibas, L.J.: Pointnet++: Deep hierarchical feature learning on point sets in a metric space. In: Advances in Neural Information Processing Systems, pp. 5105\u20135114 (2017)"},{"issue":"9","key":"2907_CR22","doi-asserted-by":"publisher","first-page":"192104","DOI":"10.1007\/s11432-021-3387-7","volume":"65","author":"D Ren","year":"2022","unstructured":"Ren, D., Wu, Z., Li, J., Yu, P., Guo, J., Wei, M., Guo, Y.: Point attention network for point cloud semantic segmentation. Sci. China Inf. Sci. 65(9), 192104 (2022)","journal-title":"Sci. China Inf. Sci."},{"key":"2907_CR23","doi-asserted-by":"publisher","unstructured":"Yin, J., Shen, J., Gao, X., Crandall, D., Yang, R.: Graph neural network and spatiotemporal transformer attention for 3d video object detection from point clouds. IEEE Trans. Pattern Anal. Mach. Intell. (2021). https:\/\/doi.org\/10.1109\/TPAMI.2021.3125981","DOI":"10.1109\/TPAMI.2021.3125981"},{"key":"2907_CR24","doi-asserted-by":"publisher","unstructured":"Cao, J., Qin, X., Zhao, S., Shen, J.: Bilateral cross-modality graph matching attention for feature fusion in visual question answering. IEEE Trans. Neural Netw. Learn. Syst. (2022). https:\/\/doi.org\/10.1109\/TNNLS.2021.3135655","DOI":"10.1109\/TNNLS.2021.3135655"},{"key":"2907_CR25","doi-asserted-by":"crossref","unstructured":"Giering, M., Venugopalan, V., Reddy, K.K.: Multi-modal sensor registration for vehicle perception via deep neural networks. In: HPEC, pp. 1\u20136 (2015)","DOI":"10.1109\/HPEC.2015.7322485"},{"key":"2907_CR26","doi-asserted-by":"publisher","first-page":"9387","DOI":"10.1109\/TIP.2020.2998275","volume":"29","author":"M Ye","year":"2020","unstructured":"Ye, M., Lan, X., Leng, Q., Shen, J.: Cross-modality person re-identification via modality-aware collaborative ensemble learning. IEEE Trans. Image Process. 29, 9387\u20139399 (2020)","journal-title":"IEEE Trans. Image Process."},{"issue":"9","key":"2907_CR27","doi-asserted-by":"publisher","first-page":"2772","DOI":"10.1109\/TMI.2020.2975344","volume":"39","author":"T Zhou","year":"2020","unstructured":"Zhou, T., Fu, H., Chen, G., Shen, J., Shao, L.: Hi-net: hybrid-fusion network for multi-modal mr image synthesis. IEEE Trans. Med. Imaging 39(9), 2772\u20132781 (2020)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"2907_CR28","doi-asserted-by":"crossref","unstructured":"Shi, S., Wang, X., Li, H.: Pointrcnn: 3d object proposal generation and detection from point cloud. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013779 (2019)","DOI":"10.1109\/CVPR.2019.00086"},{"key":"2907_CR29","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition (2014). arXiv preprint arXiv:1409.1556"},{"key":"2907_CR30","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Hua, B., Yeung, S.: Shellnet: Efficient point cloud convolutional neural networks using concentric shells statistics. In: 2019 IEEE\/CVF International Conference on Computer Vision, ICCV 2019, Seoul, Korea (South), October 27 - November 2, 2019, pp. 1607\u20131616","DOI":"10.1109\/ICCV.2019.00169"},{"key":"2907_CR31","unstructured":"Li, Y., Bu, R., Sun, M., Wu, W., Di, X., Chen, B.: Pointcnn: Convolution on x-transformed points. In: Advances in Neural Information Processing Systems, pp. 828\u2013838"},{"key":"2907_CR32","unstructured":"Tosteberg, P.: Semantic segmentation of point clouds using deep learning. Master of Science Thesis in Electrical Engineering Department of Electrical Engineering, Link\u00f6ping University (2017)"},{"key":"2907_CR33","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30, 5998\u20136008 (2017)"},{"key":"2907_CR34","unstructured":"Armeni, I., Sax, S., Zamir, A.R., Savarese, S.: Joint 2d-3d-semantic data for indoor scene understanding (2017). arXiv preprint arXiv:1702.01105"},{"key":"2907_CR35","doi-asserted-by":"crossref","unstructured":"Dai, A., Chang, A.X., Savva, M., Halber, M., Funkhouser, T., Nie\u00dfner, M.: Scannet: Richly-annotated 3d reconstructions of indoor scenes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5828\u20135839 (2017)","DOI":"10.1109\/CVPR.2017.261"},{"key":"2907_CR36","doi-asserted-by":"crossref","unstructured":"Zhu, X., Zhou, H., Wang, T., Hong, F., Ma, Y., Li, W., Li, H., Lin, D.: Cylindrical and asymmetrical 3d convolution networks for lidar segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9939\u20139948 (2021)","DOI":"10.1109\/CVPR46437.2021.00981"},{"key":"2907_CR37","doi-asserted-by":"crossref","unstructured":"Armeni, I., Sener, O., Zamir, A.R., Jiang, H., Brilakis, I.K., Fischer, M., Savarese, S.: 3d semantic parsing of large-scale indoor spaces. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition,, pp. 1534\u20131543 (2016)","DOI":"10.1109\/CVPR.2016.170"},{"key":"2907_CR38","doi-asserted-by":"crossref","unstructured":"Behley, J., Garbade, M., Milioto, A., Quenzel, J., Behnke, S., Stachniss, C., Gall, J.: Semantickitti: A dataset for semantic scene understanding of lidar sequences. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9297\u20139307 (2019)","DOI":"10.1109\/ICCV.2019.00939"},{"key":"2907_CR39","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"2907_CR40","unstructured":"Howard, A.G., Zhu, M., Chen, B., Kalenichenko, D., Wang, W., Weyand, T., Andreetto, M., Adam, H.: Mobilenets: Efficient convolutional neural networks for mobile vision applications (2017). arXiv preprint arXiv:1704.04861"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-023-02907-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-023-02907-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-023-02907-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,24]],"date-time":"2024-07-24T13:24:58Z","timestamp":1721827498000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-023-02907-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,11]]},"references-count":40,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2024,8]]}},"alternative-id":["2907"],"URL":"https:\/\/doi.org\/10.1007\/s00371-023-02907-w","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,6,11]]},"assertion":[{"value":"15 May 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 June 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that there is no conflict of interests regarding the publication of this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}