{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T07:59:50Z","timestamp":1770796790962,"version":"3.50.0"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T00:00:00Z","timestamp":1764806400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T00:00:00Z","timestamp":1764806400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"The National Nature Science Foundation of China","doi-asserted-by":"crossref","award":["No.61471206"],"award-info":[{"award-number":["No.61471206"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"The Natural Science Foundation of Jiangsu province","award":["No. BK20180088"],"award-info":[{"award-number":["No. BK20180088"]}]},{"name":"Postgraduate Research & Practice Innovation Program of Jiangsu Province","award":["No. SJCX24_0290"],"award-info":[{"award-number":["No. SJCX24_0290"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1007\/s00530-025-02068-x","type":"journal-article","created":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T07:10:39Z","timestamp":1764832239000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["3D semantic segmentation for railway scenes via heterogeneous multimodal alignment and distillation"],"prefix":"10.1007","volume":"32","author":[{"given":"Ning","family":"Sun","sequence":"first","affiliation":[]},{"given":"Yuchen","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Maomao","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Jixin","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Lei","family":"Chai","sequence":"additional","affiliation":[]},{"given":"Cong","family":"Wu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,12,4]]},"reference":[{"issue":"4","key":"2068_CR1","doi-asserted-by":"publisher","first-page":"475","DOI":"10.3390\/app8040475","volume":"8","author":"M L\u00fcy","year":"2018","unstructured":"L\u00fcy, M., \u00c7am, E., Ulam\u0131\u015f, F., Uzun, I., Ak\u0131n, S.I.: Initial results of testing a multilayer laser scanner in a collision avoidance system for light rail vehicles. Appl. Sci. 8(4), 475 (2018)","journal-title":"Appl. Sci."},{"key":"2068_CR2","doi-asserted-by":"crossref","unstructured":"Gao, H., Huang, Y., Li, H., Zhang, Q.: Multi-sensor fusion perception system in train. In: 2021 IEEE 10th Data Driven Control and Learning Systems Conference (DDCLS), pp. 1171\u20131176 (2021). IEEE","DOI":"10.1109\/DDCLS52934.2021.9455542"},{"key":"2068_CR3","doi-asserted-by":"crossref","unstructured":"Ku, J., Mozifian, M., Lee, J., Harakeh, A., Waslander, S.L.: Joint 3d proposal generation and object detection from view aggregation. In: 2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 1\u20138 (2018). IEEE","DOI":"10.1109\/IROS.2018.8594049"},{"key":"2068_CR4","doi-asserted-by":"crossref","unstructured":"Lu, H., Chen, X., Zhang, G., Zhou, Q., Ma, Y., Zhao, Y.: Scanet: Spatial-channel attention network for 3d object detection. In: ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1992\u20131996 (2019). IEEE","DOI":"10.1109\/ICASSP.2019.8682746"},{"key":"2068_CR5","doi-asserted-by":"crossref","unstructured":"Liang, M., Yang, B., Wang, S., Urtasun, R.: Deep continuous fusion for multi-sensor 3d object detection. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 641\u2013656 (2018)","DOI":"10.1007\/978-3-030-01270-0_39"},{"key":"2068_CR6","doi-asserted-by":"crossref","unstructured":"Sindagi, V.A., Zhou, Y., Tuzel, O.: Mvx-net: Multimodal voxelnet for 3d object detection. In: 2019 International Conference on Robotics and Automation (ICRA), pp. 7276\u20137282 (2019). IEEE","DOI":"10.1109\/ICRA.2019.8794195"},{"key":"2068_CR7","doi-asserted-by":"crossref","unstructured":"Vora, S., Lang, A.H., Helou, B., Beijbom, O.: Pointpainting: Sequential fusion for 3d object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4604\u20134612 (2020)","DOI":"10.1109\/CVPR42600.2020.00466"},{"key":"2068_CR8","doi-asserted-by":"crossref","unstructured":"Song, S., Xiao, J.: Sliding shapes for 3d object detection in depth images. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part VI 13, pp. 634\u2013651 (2014). Springer","DOI":"10.1007\/978-3-319-10599-4_41"},{"key":"2068_CR9","doi-asserted-by":"crossref","unstructured":"Shao, Y., Tan, A., Sun, Z., Zheng, E., Yan, T., Liao, P.: Pv-ssd: A multi-modal point cloud 3d object detector based on projection features and voxel features. IEEE Trans. Emerg. Top. Comput. Intell. (2024)","DOI":"10.1109\/TETCI.2024.3389710"},{"key":"2068_CR10","doi-asserted-by":"crossref","unstructured":"Li, Y., Ge, Z., Yu, G., Yang, J., Wang, Z., Shi, Y., Sun, J., Li, Z.: Bevdepth: Acquisition of reliable depth for multi-view 3d object detection. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 37, pp. 1477\u20131485 (2023)","DOI":"10.1609\/aaai.v37i2.25233"},{"key":"2068_CR11","doi-asserted-by":"crossref","unstructured":"Khami, A., Neumann, I., Vogel, S.: Intrinsic and extrinsic calibration of a uav-based multi-sensor system. J. Appl. Geodesy (0) (2025)","DOI":"10.1515\/jag-2024-0016"},{"key":"2068_CR12","unstructured":"Ye, T., Jing, W., Hu, C., Huang, S., Gao, L., Li, F., Wang, J., Guo, K., Xiao, W., Mao, W., et al.: Fusionad: Multi-modality fusion for prediction and planning tasks of autonomous driving. arXiv:2308.01006 (2023)"},{"key":"2068_CR13","unstructured":"Li, J., Li, H., Liu, J., Zou, Z., Ye, X., Wang, F., Huang, J., Wu, H., Wang, H.: Exploring the causality of end-to-end autonomous driving. arXiv:2407.06546 (2024)"},{"key":"2068_CR14","unstructured":"Zhang, L., Liu, Z., Zhu, X., Song, Z., Yang, X., Lei, Z., Qiao, H.: Weakly aligned feature fusion for multimodal object detection. IEEE Trans. Neural Netw. Learn. Syst. (2021)"},{"issue":"4","key":"2068_CR15","doi-asserted-by":"publisher","first-page":"982","DOI":"10.3390\/rs15040982","volume":"15","author":"R Mahmoudi Kouhi","year":"2023","unstructured":"Mahmoudi Kouhi, R., Daniel, S., Gigu\u00e8re, P.: Data preparation impact on semantic segmentation of 3d mobile lidar point clouds using deep neural networks. Rem Sens 15(4), 982 (2023)","journal-title":"Rem Sens"},{"key":"2068_CR16","doi-asserted-by":"crossref","unstructured":"Caesar, H., Bankiti, V., Lang, A.H., Vora, S., Liong, V.E., Xu, Q., Krishnan, A., Pan, Y., Baldan, G., Beijbom, O.: nuscenes: A multimodal dataset for autonomous driving. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11621\u201311631 (2020)","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"2068_CR17","doi-asserted-by":"crossref","unstructured":"Behley, J., Garbade, M., Milioto, A., Quenzel, J., Behnke, S., Stachniss, C., Gall, J.: Semantickitti: A dataset for semantic scene understanding of lidar sequences. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9297\u20139307 (2019)","DOI":"10.1109\/ICCV.2019.00939"},{"key":"2068_CR18","doi-asserted-by":"crossref","unstructured":"Gao, J., Ma, X., Xu, C.: Learning transferable conceptual prototypes for interpretable unsupervised domain adaptation. IEEE Trans. Image Process. (2024)","DOI":"10.1109\/TIP.2024.3459626"},{"issue":"6","key":"2068_CR19","doi-asserted-by":"publisher","first-page":"3957","DOI":"10.1007\/s00371-024-03640-8","volume":"41","author":"G Yue","year":"2025","unstructured":"Yue, G., Jiao, G., Li, C., Xiang, J.: When cnn meet with vit: decision-level feature fusion for camouflaged object detection. Vis. Comput. 41(6), 3957\u20133972 (2025)","journal-title":"Vis. Comput."},{"key":"2068_CR20","doi-asserted-by":"crossref","unstructured":"Yue, G., Jiao, G., Xiang, J.: Semi-supervised iterative learning network for camouflaged object detection. In: ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1\u20135 (2025). IEEE","DOI":"10.1109\/ICASSP49660.2025.10890224"},{"key":"2068_CR21","doi-asserted-by":"crossref","unstructured":"Wang, F., Jiao, G., Yue, G.: More observation leads to more clarity: Multi-view collaboration network for camouflaged object detection. Neurocomputing, 130433 (2025)","DOI":"10.1016\/j.neucom.2025.130433"},{"key":"2068_CR22","doi-asserted-by":"crossref","unstructured":"Gao, J., Chen, M., Xu, C.: Learning probabilistic presence-absence evidence for weakly-supervised audio-visual event perception. IEEE Trans. Patt. Anal. Mach. Intell. (2025)","DOI":"10.1109\/TPAMI.2025.3546312"},{"key":"2068_CR23","doi-asserted-by":"crossref","unstructured":"Gao, J., Yang, K., Yao, X., Hu, Y.: Unity in diversity: Video editing via gradient-latent purification. In: Proceedings of the Computer Vision and Pattern Recognition Conference, pp. 23401\u201323411 (2025)","DOI":"10.1109\/CVPR52734.2025.02179"},{"key":"2068_CR24","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: Pointnet: Deep learning on point sets for 3d classification and segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 652\u2013660 (2017)"},{"key":"2068_CR25","doi-asserted-by":"crossref","unstructured":"Luo, W., Yang, B., Urtasun, R.: Fast and furious: Real time end-to-end 3d detection, tracking and motion forecasting with a single convolutional net. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3569\u20133577 (2018)","DOI":"10.1109\/CVPR.2018.00376"},{"key":"2068_CR26","unstructured":"Qi, C.R., Yi, L., Su, H., Guibas, L.J.: Pointnet++: Deep hierarchical feature learning on point sets in a metric space. Adv. Neural Inform. Process. Syst. 30 (2017)"},{"key":"2068_CR27","doi-asserted-by":"crossref","unstructured":"Choy, C., Gwak, J., Savarese, S.: 4d spatio-temporal convnets: Minkowski convolutional neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3075\u20133084 (2019)","DOI":"10.1109\/CVPR.2019.00319"},{"issue":"10","key":"2068_CR28","doi-asserted-by":"publisher","first-page":"3337","DOI":"10.3390\/s18103337","volume":"18","author":"Y Yan","year":"2018","unstructured":"Yan, Y., Mao, Y., Li, B.: Second: Sparsely embedded convolutional detection. Sensors 18(10), 3337 (2018)","journal-title":"Sensors"},{"key":"2068_CR29","unstructured":"Zhou, H., Zhu, X., Song, X., Ma, Y., Wang, Z., Li, H., Lin, D.: Cylinder3d: An effective 3d framework for driving-scene lidar semantic segmentation. arXiv:2008.01550 (2020)"},{"key":"2068_CR30","doi-asserted-by":"crossref","unstructured":"Lai, X., Chen, Y., Lu, F., Liu, J., Jia, J.: Spherical transformer for lidar-based 3d recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17545\u201317555 (2023)","DOI":"10.1109\/CVPR52729.2023.01683"},{"key":"2068_CR31","doi-asserted-by":"crossref","unstructured":"Xu, J., Zhang, R., Dou, J., Zhu, Y., Sun, J., Pu, S.: Rpvnet: A deep and efficient range-point-voxel fusion network for lidar point cloud segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 16024\u201316033 (2021)","DOI":"10.1109\/ICCV48922.2021.01572"},{"key":"2068_CR32","doi-asserted-by":"crossref","unstructured":"Wu, X., Jiang, L., Wang, P.-S., Liu, Z., Liu, X., Qiao, Y., Ouyang, W., He, T., Zhao, H.: Point transformer v3: Simpler faster stronger. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4840\u20134851 (2024)","DOI":"10.1109\/CVPR52733.2024.00463"},{"key":"2068_CR33","doi-asserted-by":"crossref","unstructured":"El\u00a0Madawi, K., Rashed, H., El\u00a0Sallab, A., Nasr, O., Kamel, H., Yogamani, S.: Rgb and lidar fusion based 3d semantic segmentation for autonomous driving. In: 2019 IEEE Intelligent Transportation Systems Conference (ITSC), pp. 7\u201312 (2019). IEEE","DOI":"10.1109\/ITSC.2019.8917447"},{"key":"2068_CR34","doi-asserted-by":"crossref","unstructured":"Zhuang, Z., Li, R., Jia, K., Wang, Q., Li, Y., Tan, M.: Perception-aware multi-sensor fusion for 3d lidar semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 16280\u201316290 (2021)","DOI":"10.1109\/ICCV48922.2021.01597"},{"key":"2068_CR35","doi-asserted-by":"crossref","unstructured":"Li, J., Dai, H., Han, H., Ding, Y.: Mseg3d: Multi-modal 3d semantic segmentation for autonomous driving. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 21694\u201321704 (2023)","DOI":"10.1109\/CVPR52729.2023.02078"},{"key":"2068_CR36","doi-asserted-by":"crossref","unstructured":"Yan, X., Gao, J., Zheng, C., Zheng, C., Zhang, R., Cui, S., Li, Z.: 2dpass: 2d priors assisted semantic segmentation on lidar point clouds. In: European Conference on Computer Vision, pp. 677\u2013695 (2022). Springer","DOI":"10.1007\/978-3-031-19815-1_39"},{"key":"2068_CR37","doi-asserted-by":"publisher","DOI":"10.1016\/j.conbuildmat.2021.126265","volume":"322","author":"F Liu","year":"2022","unstructured":"Liu, F., Wang, L.: Unet-based model for crack detection integrating visual explanations. Constr. Build. Mater. 322, 126265 (2022)","journal-title":"Constr. Build. Mater."},{"key":"2068_CR38","doi-asserted-by":"crossref","unstructured":"Tang, H., Liu, Z., Zhao, S., Lin, Y., Lin, J., Wang, H., Han, S.: Searching efficient 3d architectures with sparse point-voxel convolution. In: European Conference on Computer Vision, pp. 685\u2013702 (2020). Springer","DOI":"10.1007\/978-3-030-58604-1_41"},{"key":"2068_CR39","unstructured":"Gu, A., Dao, T.: Mamba: Linear-time sequence modeling with selective state spaces. arXiv:2312.00752 (2023)"},{"issue":"3","key":"2068_CR40","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1007\/s41019-023-00226-7","volume":"8","author":"S Xiao","year":"2023","unstructured":"Xiao, S., Zhu, D., Tang, C., Huang, Z.: Combining graph contrastive embedding and multi-head cross-attention transfer for cross-domain recommendation. Data Sci. Eng. 8(3), 247\u2013262 (2023)","journal-title":"Data Sci. Eng."},{"key":"2068_CR41","doi-asserted-by":"crossref","unstructured":"Ding, Y., Tong, C., Zhang, S., Jiang, M., Li, Y., Lim, K.J., Guan, C.: Emt: A novel transformer for generalized cross-subject eeg emotion recognition. IEEE Trans. Neural Netw. Learn. Syst. (2025)","DOI":"10.1109\/TNNLS.2025.3552603"},{"key":"2068_CR42","doi-asserted-by":"crossref","unstructured":"Li, Y., Wang, Y., Cui, Z.: Decoupled multimodal distilling for emotion recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6631\u20136640 (2023)","DOI":"10.1109\/CVPR52729.2023.00641"},{"key":"2068_CR43","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Zhou, Z., David, P., Yue, X., Xi, Z., Gong, B., Foroosh, H.: Polarnet: An improved grid representation for online lidar point clouds semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9601\u20139610 (2020)","DOI":"10.1109\/CVPR42600.2020.00962"},{"key":"2068_CR44","doi-asserted-by":"crossref","unstructured":"Cheng, R., Razani, R., Taghavi, E., Li, E., Liu, B.: 2-s3net: Attentive feature fusion with adaptive feature selection for sparse semantic segmentation network. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12547\u201312556 (2021)","DOI":"10.1109\/CVPR46437.2021.01236"},{"key":"2068_CR45","doi-asserted-by":"crossref","unstructured":"Genova, K., Yin, X., Kundu, A., Pantofaru, C., Cole, F., Sud, A., Brewington, B., Shucker, B., Funkhouser, T.: Learning 3d semantic segmentation with only 2d image supervision. In: 2021 International Conference on 3D Vision (3DV), pp. 361\u2013372 (2021). IEEE","DOI":"10.1109\/3DV53792.2021.00046"},{"key":"2068_CR46","doi-asserted-by":"crossref","unstructured":"Thomas, H., Qi, C.R., Deschaud, J.-E., Marcotegui, B., Goulette, F., Guibas, L.J.: Kpconv: Flexible and deformable convolution for point clouds. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6411\u20136420 (2019)","DOI":"10.1109\/ICCV.2019.00651"},{"key":"2068_CR47","doi-asserted-by":"crossref","unstructured":"Hou, Y., Zhu, X., Ma, Y., Loy, C.C., Li, Y.: Point-to-voxel knowledge distillation for lidar semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8479\u20138488 (2022)","DOI":"10.1109\/CVPR52688.2022.00829"},{"key":"2068_CR48","doi-asserted-by":"crossref","unstructured":"Xu, X., Kong, L., Shuai, H., Liu, Q.: Frnet: Frustum-range networks for scalable lidar segmentation. IEEE Trans. Image Process. (2025)","DOI":"10.1109\/TIP.2025.3550011"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02068-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-02068-x","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02068-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T04:20:36Z","timestamp":1770783636000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-02068-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,4]]},"references-count":48,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,2]]}},"alternative-id":["2068"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-02068-x","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,4]]},"assertion":[{"value":"6 June 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"4"}}