{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T15:52:45Z","timestamp":1759333965925,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":39,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783032060778"},{"type":"electronic","value":"9783032060785"}],"license":[{"start":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T00:00:00Z","timestamp":1759190400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T00:00:00Z","timestamp":1759190400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-06078-5_28","type":"book-chapter","created":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T08:58:12Z","timestamp":1759222692000},"page":"492-508","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Revisiting Cross-Modal Knowledge Distillation: A Disentanglement Approach for\u00a0RGBD Semantic Segmentation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7932-8249","authenticated-orcid":false,"given":"Roger","family":"Ferrod","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1934-0625","authenticated-orcid":false,"given":"C\u00e1ssio F.","family":"Dantas","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7570-637X","authenticated-orcid":false,"given":"Luigi","family":"Di Caro","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8736-3132","authenticated-orcid":false,"given":"Dino","family":"Ienco","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,9,30]]},"reference":[{"key":"28_CR1","unstructured":"Xue, Z., Gao, Z., Ren, S., Zhao, H.: The modality focusing hypothesis: towards understanding crossmodal knowledge distillation. In: ICLR (2022)"},{"key":"28_CR2","doi-asserted-by":"crossref","unstructured":"Bucila, C., Caruana, R., Niculescu-Mizil, A.: Model compression. In: KDD (2006)","DOI":"10.1145\/1150402.1150464"},{"key":"28_CR3","unstructured":"Hinton, G.E., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. ArXiv, vol. abs\/1503.02531 (2015)"},{"key":"28_CR4","doi-asserted-by":"crossref","unstructured":"Jin, Y., Wang, J., Lin, D.: Multi-level logit distillation. In: CVPR, pp. 24276\u201324285 (2023)","DOI":"10.1109\/CVPR52729.2023.02325"},{"key":"28_CR5","doi-asserted-by":"crossref","unstructured":"Guo, Z., Yan, H., Li, H., Lin, X.L.: Class attention transfer based knowledge distillation. CVPR, pp. 11868\u201311877 (2023)","DOI":"10.1109\/CVPR52729.2023.01142"},{"key":"28_CR6","unstructured":"Huang, T., You, S., Wang, F., Qian, C., Xu, C.: Knowledge distillation from a stronger teacher. ArXiv, vol. abs\/2205.10536 (2022)"},{"key":"28_CR7","doi-asserted-by":"crossref","unstructured":"Liu, B., et al.: Lite-MKD: a multi-modal knowledge distillation framework for lightweight few-shot action recognition. In: ACM Multimedia (2023)","DOI":"10.1145\/3581783.3612279"},{"key":"28_CR8","doi-asserted-by":"publisher","first-page":"103352","DOI":"10.1016\/j.cviu.2021.103352","volume":"216","author":"FM Hafner","year":"2018","unstructured":"Hafner, F.M., Bhuyian, A.H., Kooij, J.F.P., Granger, E.: Cross-modal distillation for RGB-depth person re-identification. Comput. Vis. Image Underst. 216, 103352 (2018)","journal-title":"Comput. Vis. Image Underst."},{"key":"28_CR9","doi-asserted-by":"crossref","unstructured":"Hu, M., et al.: Knowledge distillation from multi-modal to mono-modal segmentation networks. ArXiv, vol. abs\/2106.09564 (2020)","DOI":"10.1007\/978-3-030-59710-8_75"},{"key":"28_CR10","doi-asserted-by":"crossref","unstructured":"Hu, X., Yang, K., Fei, L., Wang, K.: ACNet: attention based network to exploit complementary features for RGBD semantic segmentation. In: ICIP, pp. 1440\u20131444 (2019)","DOI":"10.1109\/ICIP.2019.8803025"},{"key":"28_CR11","unstructured":"Xu, X., Kong, L., Shuai, H., Liu, Q.: FRNet: frustum-range networks for scalable lidar segmentation. ArXiv, vol. abs\/2312.04484 (2023)"},{"key":"28_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-319-54181-5_14","volume-title":"Computer Vision \u2013 ACCV 2016","author":"C Hazirbas","year":"2017","unstructured":"Hazirbas, C., Ma, L., Domokos, C., Cremers, D.: FuseNet: incorporating depth into semantic segmentation via fusion-based CNN architecture. In: Lai, S.-H., Lepetit, V., Nishino, K., Sato, Y. (eds.) ACCV 2016. LNCS, vol. 10111, pp. 213\u2013228. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-54181-5_14"},{"key":"28_CR13","unstructured":"Couprie, C., Farabet, C., Najman, L., LeCun, Y.: Indoor semantic segmentation using depth information. arXiv (2013)"},{"key":"28_CR14","doi-asserted-by":"publisher","first-page":"1481","DOI":"10.1109\/TCSVT.2023.3296162","volume":"34","author":"J Yang","year":"2023","unstructured":"Yang, J., Bai, L., Sun, Y., Tian, C., Mao, M., Wang, G.: Pixel difference convolutional network for RGB-D semantic segmentation. IEEE Trans. Circ. Sys. Video Tech. 34, 1481\u20131492 (2023)","journal-title":"IEEE Trans. Circ. Sys. Video Tech."},{"key":"28_CR15","doi-asserted-by":"crossref","unstructured":"Lee, S., Park, S.J., Hong, K.S.: RDFNet: RGB-D multi-level residual feature fusion for indoor semantic segmentation. In: ICCV, pp. 4990\u20134999 (2017)","DOI":"10.1109\/ICCV.2017.533"},{"key":"28_CR16","unstructured":"Jiang, J., Zheng, L., Luo, F., Zhang, Z.: RedNet: residual encoder-decoder network for indoor RGB-D semantic segmentation. ArXiv, vol. abs\/1806.01054 (2018)"},{"key":"28_CR17","doi-asserted-by":"publisher","first-page":"6348","DOI":"10.1109\/TMM.2023.3349072","volume":"26","author":"Y Lv","year":"2024","unstructured":"Lv, Y., Liu, Z., Li, G.: Context-aware interaction network for RGB-T semantic segmentation. IEEE Trans. Multimed. 26, 6348\u20136360 (2024)","journal-title":"IEEE Trans. Multimed."},{"issue":"3","key":"28_CR18","doi-asserted-by":"publisher","first-page":"1000","DOI":"10.1109\/TASE.2020.2993143","volume":"18","author":"Y Sun","year":"2021","unstructured":"Sun, Y., Zuo, W., Yun, P., Wang, H., Liu, M.: FuseSeg: Semantic segmentation of urban scenes based on RGB and thermal data fusion. IEEE Trans. Autom. Sci. Eng. 18(3), 1000\u20131011 (2021)","journal-title":"IEEE Trans. Autom. Sci. Eng."},{"key":"28_CR19","doi-asserted-by":"crossref","unstructured":"Yang, Z., Li, Z., Shao, M., Shi, D., Yuan, Z., Yuan, C.: Masked generative distillation. In: ECCV (2022)","DOI":"10.1007\/978-3-031-20083-0_4"},{"key":"28_CR20","doi-asserted-by":"crossref","unstructured":"Wang, S., Yan, Z., Zhang, D., Wei, H., Li, Z., Li, R.: Prototype knowledge distillation for medical segmentation with missing modality. In: ICASSP (2023)","DOI":"10.1109\/ICASSP49357.2023.10095014"},{"key":"28_CR21","doi-asserted-by":"crossref","unstructured":"Liu, T., Chen, C., Yang, X., Tan, W.: Rethinking knowledge distillation with raw features for semantic segmentation. In: WACV, pp. 1144\u20131153 (2024)","DOI":"10.1109\/WACV57701.2024.00119"},{"key":"28_CR22","doi-asserted-by":"crossref","unstructured":"Wang, X., Chen, H., Wu, Z., Zhu, W., et al.: Disentangled representation learning. IEEE Trans. Pattern Anal. Mach. Intell. (2024)","DOI":"10.1109\/TPAMI.2024.3420937"},{"key":"28_CR23","unstructured":"Tsai, Y.H.H., Pu Liang, P., Zadeh, A, Morency, L.P., Salakhutdinov, R.: Learning factorized multimodal representations. ArXiv, vol. abs\/1806.06176 (2018)"},{"issue":"10","key":"28_CR24","doi-asserted-by":"publisher","first-page":"7956","DOI":"10.1109\/TNNLS.2022.3147546","volume":"34","author":"Y Zhang","year":"2023","unstructured":"Zhang, Y., Zhang, Y., Guo, W., Cai, X., Yuan, X.: Learning disentangled representation for multimodal cross-domain sentiment analysis. IEEE Trans. Neural Net. Learn. Syst. 34(10), 7956\u20137966 (2023)","journal-title":"IEEE Trans. Neural Net. Learn. Syst."},{"key":"28_CR25","doi-asserted-by":"crossref","unstructured":"Xu, Z., et al.: Predict, prevent, and evaluate: Disentangled text-driven image manipulation empowered by pre-trained vision-language model. In: CVPR, pp. 18208\u201318217 (2021)","DOI":"10.1109\/CVPR52688.2022.01769"},{"key":"28_CR26","doi-asserted-by":"crossref","unstructured":"Yu, Y., et al.: Towards counterfactual image manipulation via CLIP. In: ACM Multimedia (2022)","DOI":"10.1145\/3503161.3547935"},{"key":"28_CR27","doi-asserted-by":"crossref","unstructured":"Materzy\u0144ska, J., Torralba, A., Bau, D.: Disentangling visual and written concepts in CLIP. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01592"},{"key":"28_CR28","unstructured":"Ienco, D., Dantas, C.F.: Discom-kd: cross-modal knowledge distillation via disentanglement representation and adversarial learning. In: BMVC (2024)"},{"key":"28_CR29","unstructured":"Zhang, H., Ciss\u00e9, M., Dauphin, Y., Lopez-Paz, D.: Mixup: beyond empirical risk minimization. ArXiv, vol. abs\/1710.09412 (2017)"},{"key":"28_CR30","unstructured":"van\u00a0den Oord, A., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. ArXiv, vol. abs\/1807.03748 (2018)"},{"key":"28_CR31","unstructured":"Wan, Z., et al.: Sigma: siamese mamba network for multi-modal semantic segmentation. ArXiv, vol. abs\/2404.04256 (2024)"},{"key":"28_CR32","doi-asserted-by":"crossref","unstructured":"Kohli, P. Silberman, N., Hoiem, D., Fergus, R.: Indoor segmentation and support inference from RGBD images. In: ECCV, 2012","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"28_CR33","doi-asserted-by":"crossref","unstructured":"Girdhar, R., Singh, M, Ravi, N., van\u00a0der Maaten, L., Joulin, A., Misra, I.: Omnivore: A single model for many visual modalities. In: CVPR, pp. 16081\u201316091 (2022)","DOI":"10.1109\/CVPR52688.2022.01563"},{"key":"28_CR34","unstructured":"ISPRS. Urban modelling and semantic labelling benchmark (2024). https:\/\/www.isprs.org\/education\/benchmarks\/UrbanSemLab\/default.aspx"},{"key":"28_CR35","unstructured":"Kieu, N. Nguyen, K., Sridharan, S., Fookes, C.: General-purpose multimodal transformer meets remote sensing semantic segmentation. ArXiv, vol. abs\/2307.03388 (2023)"},{"key":"28_CR36","doi-asserted-by":"crossref","unstructured":"Fonder, M., Droogenbroeck, M.: Mid-air: a multi-modal dataset for extremely low altitude drone flights. In: CVPRW (2019)","DOI":"10.1109\/CVPRW.2019.00081"},{"key":"28_CR37","unstructured":"Lopez-Paz, D., Bottou, L., Scholkopf, B., Naumovich Vapnik, V.: Unifying distillation and privileged information. ArXiv, vol. abs\/1511.03643 (2015)"},{"key":"28_CR38","doi-asserted-by":"crossref","unstructured":"Chen, L.C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with atrous separable convolution for semantic image segmentation. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"28_CR39","unstructured":"Xue, Z., Gao, Z., Ren, S., Zhao, H.: The modality focusing hypothesis: towards understanding crossmodal knowledge distillation. In: ICLR (2023)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-06078-5_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T08:58:23Z","timestamp":1759222703000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-06078-5_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,30]]},"ISBN":["9783032060778","9783032060785"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-06078-5_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,9,30]]},"assertion":[{"value":"30 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Porto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecmlpkdd.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}