{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T00:47:12Z","timestamp":1774658832335,"version":"3.50.1"},"publisher-location":"Cham","reference-count":48,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031703775","type":"print"},{"value":"9783031703782","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70378-2_25","type":"book-chapter","created":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T09:02:05Z","timestamp":1725181325000},"page":"401-417","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Leveraging Foundation Models for\u00a0Multi-modal Federated Learning with\u00a0Incomplete Modality"],"prefix":"10.1007","author":[{"given":"Liwei","family":"Che","sequence":"first","affiliation":[]},{"given":"Jiaqi","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xinyue","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Fenglong","family":"Ma","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,22]]},"reference":[{"key":"25_CR1","doi-asserted-by":"crossref","unstructured":"Che, L., Long, Z., Wang, J., Wang, Y., Xiao, H., Ma, F.: Fedtrinet: a pseudo labeling method with three players for federated semi-supervised learning. In: 2021 IEEE Big Data, pp. 715\u2013724 (2021)","DOI":"10.1109\/BigData52589.2021.9671374"},{"key":"25_CR2","doi-asserted-by":"crossref","unstructured":"Che, L., Wang, J., Zhou, Y., Ma, F.: Multimodal federated learning: A survey. Sensors 23(15) (2023)","DOI":"10.3390\/s23156986"},{"key":"25_CR3","doi-asserted-by":"crossref","unstructured":"Chen, J., Zhang, A.: Fedmsplit: Correlation-adaptive federated multi-task learning across multimodal split networks. In: ACM SIGKDD, pp. 87-96 (2022)","DOI":"10.1145\/3534678.3539384"},{"key":"25_CR4","unstructured":"Chen, J., Xu, W., Guo, S., Wang, J., Zhang, J., Wang, H.: Fedtune: a deep dive into efficient federated fine-tuning with pre-trained transformers (2022)"},{"key":"25_CR5","doi-asserted-by":"crossref","unstructured":"Chen, S., Li, B.: Towards optimal multi-modal federated learning on non-iid data with hierarchical gradient blending. In: IEEE INFOCOM 2022-IEEE Conference on Computer Communications, pp. 1469\u20131478. IEEE (2022)","DOI":"10.1109\/INFOCOM48880.2022.9796724"},{"key":"25_CR6","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2022.102585","volume":"82","author":"BM Cobbinah","year":"2022","unstructured":"Cobbinah, B.M., et al.: Reducing variations in multi-center alzheimer\u2019s disease classification with convolutional adversarial autoencoder. Med. Image Anal. 82, 102585 (2022)","journal-title":"Med. Image Anal."},{"key":"25_CR7","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv:1810.04805 (2018)"},{"key":"25_CR8","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"25_CR9","unstructured":"Guo, T., Guo, S., Wang, J., Xu, W.: Promptfl: Let federated participants cooperatively learn prompts instead of models\u2013federated learning in age of foundation model. arXiv preprint arXiv:2208.11625 (2022)"},{"issue":"2","key":"25_CR10","doi-asserted-by":"publisher","first-page":"520","DOI":"10.1109\/TCSVT.2019.2892802","volume":"30","author":"X He","year":"2019","unstructured":"He, X., Peng, Y.: Fine-grained visual-textual representation learning. IEEE Trans. Circuits Syst. Video Technol. 30(2), 520\u2013531 (2019)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"25_CR11","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network (2015)"},{"key":"25_CR12","unstructured":"Jia, C., et al.: Scaling up visual and vision-language representation learning with noisy text supervision. In: International Conference on Machine Learning, pp. 4904\u20134916. PMLR (2021)"},{"key":"25_CR13","unstructured":"Kornblith, S., Norouzi, M., Lee, H., Hinton, G.: Similarity of neural network representations revisited. In: International Conference on Machine Learning, pp. 3519\u20133529. PMLR (2019)"},{"key":"25_CR14","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597 (2023)"},{"key":"25_CR15","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In: International Conference on Machine Learning, pp. 12888\u201312900. PMLR (2022)"},{"key":"25_CR16","doi-asserted-by":"crossref","unstructured":"Li, W., et al.: Unimo: towards unified-modal understanding and generation via cross-modal contrastive learning. arXiv preprint arXiv:2012.15409 (2020)","DOI":"10.18653\/v1\/2021.acl-long.202"},{"key":"25_CR17","unstructured":"Liang, W., Zhang, Y., Kwon, Y., Yeung, S., Zou, J.: Mind the gap: Understanding the modality gap in multi-modal contrastive representation learning. arXiv preprint arXiv:2203.02053 (2022)"},{"key":"25_CR18","doi-asserted-by":"crossref","unstructured":"Liu, F., Wu, X., Ge, S., Fan, W., Zou, Y.: Federated learning for vision-and-language grounding problems. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 11572\u201311579 (2020)","DOI":"10.1609\/aaai.v34i07.6824"},{"key":"25_CR19","unstructured":"Lu, W., Hu, X., Wang, J., Xie, X.: Fedclip: Fast generalization and personalization for clip in federated learning. arXiv preprint arXiv:2302.13485 (2023)"},{"key":"25_CR20","doi-asserted-by":"crossref","unstructured":"Ma, M., Ren, J., Zhao, L., Testuggine, D., Peng, X.: Are multimodal transformers robust to missing modality? In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 18177\u201318186 (June 2022)","DOI":"10.1109\/CVPR52688.2022.01764"},{"key":"25_CR21","doi-asserted-by":"crossref","unstructured":"Ma, M., Ren, J., Zhao, L., Testuggine, D., Peng, X.: Are multimodal transformers robust to missing modality? In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18177\u201318186 (2022)","DOI":"10.1109\/CVPR52688.2022.01764"},{"key":"25_CR22","doi-asserted-by":"crossref","unstructured":"Ma, M., Ren, J., Zhao, L., Tulyakov, S., Wu, C., Peng, X.: Smil: multimodal learning with severely missing modality. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 2302\u20132310 (2021)","DOI":"10.1609\/aaai.v35i3.16330"},{"key":"25_CR23","unstructured":"McMahan, B., Moore, E., Ramage, D., Hampson, S., y\u00a0Arcas, B.A.: Communication-efficient learning of deep networks from decentralized data. In: Artificial Intelligence and Statistics, pp. 1273\u20131282. PMLR (2017)"},{"key":"25_CR24","doi-asserted-by":"crossref","unstructured":"Nilsback, M.E., Zisserman, A.: Automated flower classification over a large number of classes. In: Indian Conference on Computer Vision, Graphics and Image Processing (Dec 2008)","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"25_CR25","doi-asserted-by":"crossref","unstructured":"Pandey, G., Dukkipati, A.: Variational methods for conditional multimodal deep learning. In: 2017 International Joint Conference on Neural Networks (IJCNN), pp. 308\u2013315. IEEE (2017)","DOI":"10.1109\/IJCNN.2017.7965870"},{"key":"25_CR26","unstructured":"Poklukar, P., Vasco, M., Yin, H., Melo, F.S., Paiva, A., Kragic, D.: Geometric multimodal contrastive representation learning. In: International Conference on Machine Learning, pp. 17782\u201317800. PMLR (2022)"},{"key":"25_CR27","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International conference on machine learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"25_CR28","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. arXiv:2204.06125 (2022)"},{"key":"25_CR29","unstructured":"Shi, Y., Paige, B., Torr, P.H., Siddharth, N.: Relating by contrasting: A data-efficient framework for multimodal generative models. arXiv preprint arXiv:2007.01179 (2020)"},{"key":"25_CR30","unstructured":"Suzuki, M., Nakayama, K., Matsuo, Y.: Joint multimodal learning with deep generative models. arXiv preprint arXiv:1611.01891 (2016)"},{"key":"25_CR31","unstructured":"Tan, Y., Long, G., Ma, J., Liu, L., Zhou, T., Jiang, J.: Federated learning from pre-trained models: A contrastive learning approach. arXiv:2209.10083 (2022)"},{"key":"25_CR32","doi-asserted-by":"crossref","unstructured":"Tian, Y., Wan, Y., Lyu, L., Yao, D., Jin, H., Sun, L.: Fedbert: When federated learning meets pre-training. ACM Trans. Intell. Syst. Technol. 13(4) (2022)","DOI":"10.1145\/3510033"},{"key":"25_CR33","doi-asserted-by":"crossref","unstructured":"Tsai, Y.H.H., Bai, S., Liang, P.P., Kolter, J.Z., Morency, L.P., Salakhutdinov, R.: Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the Conference. Association for Computational Linguistics. Meeting, vol.\u00a02019, p.\u00a06558. NIH Public Access (2019)","DOI":"10.18653\/v1\/P19-1656"},{"key":"25_CR34","unstructured":"Wah, C., Branson, S., Welinder, P., Perona, P., Belongie, S.: Caltech-ucsd birds-200-2011 (cub-200-2011). Tech. rep. (2011)"},{"key":"25_CR35","doi-asserted-by":"crossref","unstructured":"Wang, J., Chen, Y., Wu, Y., Das, M., Yang, H., Ma, F.: Rethinking personalized federated learning with clustering-based dynamic graph propagation. In: Pacific-Asia Conference on Knowledge Discovery and Data Mining, pp. 155\u2013167 (2024)","DOI":"10.1007\/978-981-97-2259-4_12"},{"key":"25_CR36","doi-asserted-by":"publisher","unstructured":"Wang, J., Qian, C., Cui, S., Glass, L., Ma, F.: Towards federated covid-19 vaccine side effect prediction. In: Joint European Conference on Machine Learning and Knowledge Discovery in Databases, pp. 437\u2013452. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-26422-1_27","DOI":"10.1007\/978-3-031-26422-1_27"},{"key":"25_CR37","unstructured":"Wang, J., et al.: Towards personalized federated learning via heterogeneous model reassembly. Adv. Neural Inform. Process. Syst. 36 (2024)"},{"key":"25_CR38","doi-asserted-by":"crossref","unstructured":"Wang, J., Zeng, S., Long, Z., Wang, Y., Xiao, H., Ma, F.: Knowledge-enhanced semi-supervised federated learning for aggregating heterogeneous lightweight clients in iot. In: Proceedings of the 2023 SIAM International Conference on Data Mining (SDM), pp. 496\u2013504. SIAM (2023)","DOI":"10.1137\/1.9781611977653.ch56"},{"key":"25_CR39","unstructured":"Wu, M., Goodman, N.: Multimodal generative models for scalable weakly-supervised learning. Adv. Neural Inform. Process. Syst. 31 (2018)"},{"key":"25_CR40","doi-asserted-by":"crossref","unstructured":"Wu, X., Huang, F., Hu, Z., Huang, H.: Faster adaptive federated learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 37(9), pp. 10379\u201310387 (2023)","DOI":"10.1609\/aaai.v37i9.26235"},{"key":"25_CR41","unstructured":"Wu, X., Lin, W.Y., Willmott, D., Condessa, F., Huang, Y., Li, Z., Ganesh, M.R.: Leveraging foundation models to improve lightweight clients in federated learning (2023)"},{"key":"25_CR42","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1016\/j.neucom.2022.01.063","volume":"480","author":"B Xiong","year":"2022","unstructured":"Xiong, B., Yang, X., Qi, F., Xu, C.: A unified framework for multi-modal federated learning. Neurocomputing 480, 110\u2013118 (2022)","journal-title":"Neurocomputing"},{"key":"25_CR43","doi-asserted-by":"crossref","unstructured":"Yang, X., Xiong, B., Huang, Y., Xu, C.: Cross-modal federated human activity recognition via modality-agnostic and modality-specific representation learning (2022)","DOI":"10.1609\/aaai.v36i3.20213"},{"key":"25_CR44","unstructured":"Yu, Q., Liu, Y., Wang, Y., Xu, K., Liu, J.: Multimodal federated learning via contrastive representation ensemble. In: ICLR (2023)"},{"key":"25_CR45","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Barnaghi, P., Haddadi, H.: Multimodal federated learning on iot data. In: 2022 IEEE\/ACM Seventh International Conference on Internet-of-Things Design and Implementation (IoTDI), pp. 43\u201354. IEEE (2022)","DOI":"10.1109\/IoTDI54339.2022.00011"},{"key":"25_CR46","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Wu, J., Wang, H., He, J.: Adversarial robustness through bias variance decomposition: a new perspective for federated learning. In: CIKM, pp. 2753\u20132762. ACM (2022)","DOI":"10.1145\/3511808.3557232"},{"key":"25_CR47","doi-asserted-by":"crossref","unstructured":"Zolfaghari, M., Zhu, Y., Gehler, P., Brox, T.: Crossclr: Cross-modal contrastive learning for multi-modal video representations. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1450\u20131459 (2021)","DOI":"10.1109\/ICCV48922.2021.00148"},{"key":"25_CR48","doi-asserted-by":"crossref","unstructured":"Zong, L., Xie, Q., Zhou, J., Wu, P., Zhang, X., Xu, B.: Fedcmr: federated cross-modal retrieval. In: Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 1672\u20131676 (2021)","DOI":"10.1145\/3404835.3462989"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Applied Data Science Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70378-2_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T09:07:24Z","timestamp":1725181644000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70378-2_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031703775","9783031703782"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70378-2_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"22 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vilnius","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lithuania","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2024.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}