{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,12]],"date-time":"2026-06-12T15:01:29Z","timestamp":1781276489560,"version":"3.54.1"},"publisher-location":"Cham","reference-count":45,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031730030","type":"print"},{"value":"9783031730047","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73004-7_11","type":"book-chapter","created":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T17:02:14Z","timestamp":1730394134000},"page":"178-195","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Overcome Modal Bias in\u00a0Multi-modal Federated Learning via\u00a0Balanced Modality Selection"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2277-5355","authenticated-orcid":false,"given":"Yunfeng","family":"Fan","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0983-387X","authenticated-orcid":false,"given":"Wenchao","family":"Xu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7591-5315","authenticated-orcid":false,"given":"Haozhao","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1030-7834","authenticated-orcid":false,"given":"Fushuo","family":"Huo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3502-0146","authenticated-orcid":false,"given":"Jinyu","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9831-2202","authenticated-orcid":false,"given":"Song","family":"Guo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,11,1]]},"reference":[{"key":"11_CR1","doi-asserted-by":"crossref","unstructured":"Abavisani, M., Wu, L., Hu, S., Tetreault, J., Jaimes, A.: Multimodal categorization of crisis events in social media. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14679\u201314689 (2020)","DOI":"10.1109\/CVPR42600.2020.01469"},{"key":"11_CR2","doi-asserted-by":"crossref","unstructured":"Alfasly, S., Lu, J., Xu, C., Zou, Y.: Learnable irrelevant modality dropout for multimodal action recognition on modality-specific annotated videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20208\u201320217 (2022)","DOI":"10.1109\/CVPR52688.2022.01957"},{"key":"11_CR3","unstructured":"Balakrishnan, R., Li, T., Zhou, T., Himayat, N., Smith, V., Bilmes, J.: Diverse client selection for federated learning via submodular maximization. In: International Conference on Learning Representations (2022)"},{"issue":"4","key":"11_CR4","doi-asserted-by":"publisher","first-page":"377","DOI":"10.1109\/TAFFC.2014.2336244","volume":"5","author":"H Cao","year":"2014","unstructured":"Cao, H., Cooper, D.G., Keutmann, M.K., Gur, R.C., Nenkova, A., Verma, R.: CREMA-D: crowd-sourced emotional multimodal actors dataset. IEEE Trans. Affect. Comput. 5(4), 377\u2013390 (2014)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"11_CR5","doi-asserted-by":"crossref","unstructured":"Chen, J., Zhang, A.: FedMSplit: correlation-adaptive federated multi-task learning across multimodal split networks. In: Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, pp. 87\u201396 (2022)","DOI":"10.1145\/3534678.3539384"},{"key":"11_CR6","unstructured":"Cho, Y.J., Wang, J., Joshi, G.: Client selection in federated learning: Convergence analysis and power-of-choice selection strategies. arXiv preprint arXiv:2010.01243 (2020)"},{"key":"11_CR7","doi-asserted-by":"crossref","unstructured":"Cornuejols, G., Fisher, M., Nemhauser, G.L.: On the uncapacitated location problem. In: Annals of Discrete Mathematics, vol.\u00a01, pp. 163\u2013177. Elsevier (1977)","DOI":"10.1016\/S0167-5060(08)70732-5"},{"key":"11_CR8","doi-asserted-by":"crossref","unstructured":"Dai, Y., Chen, Z., Li, J., Heinecke, S., Sun, L., Xu, R.: Tackling data heterogeneity in federated learning with class prototypes. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 7314\u20137322 (2023)","DOI":"10.1609\/aaai.v37i6.25891"},{"issue":"8","key":"11_CR9","doi-asserted-by":"publisher","first-page":"1996","DOI":"10.1109\/TPDS.2021.3134647","volume":"33","author":"Y Deng","year":"2021","unstructured":"Deng, Y., et al.: Auction: automated and quality-aware client selection framework for efficient federated learning. IEEE Trans. Parallel Distrib. Syst. 33(8), 1996\u20132009 (2021)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"11_CR10","doi-asserted-by":"crossref","unstructured":"Duan, J., Li, W., Zou, D., Li, R., Lu, S.: Federated learning with data-agnostic distribution fusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8074\u20138083 (2023)","DOI":"10.1109\/CVPR52729.2023.00780"},{"key":"11_CR11","doi-asserted-by":"crossref","unstructured":"Fan, Y., Xu, W., Wang, H., Wang, J., Guo, S.: PMR: prototypical modal rebalance for multimodal learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20029\u201320038 (2023)","DOI":"10.1109\/CVPR52729.2023.01918"},{"key":"11_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"11_CR13","unstructured":"Hsu, T.M.H., Qi, H., Brown, M.: Measuring the effects of non-identical data distribution for federated visual classification. arXiv preprint arXiv:1909.06335 (2019)"},{"key":"11_CR14","unstructured":"Huang, Y., Lin, J., Zhou, C., Yang, H., Huang, L.: Modality competition: what makes joint training of multi-modal network fail in deep learning?(provably). In: International Conference on Machine Learning, pp. 9226\u20139259. PMLR (2022)"},{"issue":"1","key":"11_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/JIOT.2021.3095077","volume":"9","author":"A Imteaj","year":"2021","unstructured":"Imteaj, A., Thakker, U., Wang, S., Li, J., Amini, M.H.: A survey on federated learning for resource-constrained IoT devices. IEEE Internet Things J. 9(1), 1\u201324 (2021)","journal-title":"IEEE Internet Things J."},{"key":"11_CR16","unstructured":"Karimireddy, S.P., Kale, S., Mohri, M., Reddi, S., Stich, S., Suresh, A.T.: Scaffold: stochastic controlled averaging for federated learning. In: International Conference on Machine Learning, pp. 5132\u20135143. PMLR (2020)"},{"key":"11_CR17","doi-asserted-by":"crossref","unstructured":"Kim, B., Kim, H., Kim, K., Kim, S., Kim, J.: Learning not to learn: training deep neural networks with biased data. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9012\u20139020 (2019)","DOI":"10.1109\/CVPR.2019.00922"},{"issue":"11","key":"11_CR18","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., Haffner, P.: Gradient-based learning applied to document recognition. Proc. IEEE 86(11), 2278\u20132324 (1998)","journal-title":"Proc. IEEE"},{"key":"11_CR19","doi-asserted-by":"crossref","unstructured":"Li, H., Li, X., Hu, P., Lei, Y., Li, C., Zhou, Y.: Boosting multi-modal model performance with adaptive gradient modulation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 22214\u201322224 (2023)","DOI":"10.1109\/ICCV51070.2023.02030"},{"issue":"3","key":"11_CR20","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1109\/MSP.2020.2975749","volume":"37","author":"T Li","year":"2020","unstructured":"Li, T., Sahu, A.K., Talwalkar, A., Smith, V.: Federated learning: challenges, methods, and future directions. IEEE Signal Process. Mag. 37(3), 50\u201360 (2020)","journal-title":"IEEE Signal Process. Mag."},{"key":"11_CR21","first-page":"429","volume":"2","author":"T Li","year":"2020","unstructured":"Li, T., Sahu, A.K., Zaheer, M., Sanjabi, M., Talwalkar, A., Smith, V.: Federated optimization in heterogeneous networks. Proc. Mach. Learn. Syst. 2, 429\u2013450 (2020)","journal-title":"Proc. Mach. Learn. Syst."},{"key":"11_CR22","unstructured":"Li, X., Huang, K., Yang, W., Wang, S., Zhang, Z.: On the convergence of FedAvg on non-IID data. arXiv preprint arXiv:1907.02189 (2019)"},{"key":"11_CR23","unstructured":"McMahan, B., Moore, E., Ramage, D., Hampson, S., Arcas, B.A.: Communication-efficient learning of deep networks from decentralized data. In: Artificial Intelligence and Statistics, pp. 1273\u20131282. PMLR (2017)"},{"key":"11_CR24","doi-asserted-by":"crossref","unstructured":"Mirzasoleiman, B., Badanidiyuru, A., Karbasi, A., Vondr\u00e1k, J., Krause, A.: Lazier than lazy greedy. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a029 (2015)","DOI":"10.1609\/aaai.v29i1.9486"},{"key":"11_CR25","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1007\/BF01588971","volume":"14","author":"GL Nemhauser","year":"1978","unstructured":"Nemhauser, G.L., Wolsey, L.A., Fisher, M.L.: An analysis of approximations for maximizing submodular set functions. Math. Program. 14, 265\u2013294 (1978)","journal-title":"Math. Program."},{"issue":"6","key":"11_CR26","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1109\/MCOM.001.1900461","volume":"58","author":"S Niknam","year":"2020","unstructured":"Niknam, S., Dhillon, H.S., Reed, J.H.: Federated learning for wireless communications: motivation, opportunities, and challenges. IEEE Commun. Mag. 58(6), 46\u201351 (2020)","journal-title":"IEEE Commun. Mag."},{"key":"11_CR27","doi-asserted-by":"crossref","unstructured":"Owens, A., Efros, A.A.: Audio-visual scene analysis with self-supervised multisensory features. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 631\u2013648 (2018)","DOI":"10.1007\/978-3-030-01231-1_39"},{"key":"11_CR28","doi-asserted-by":"crossref","unstructured":"Peng, X., Wei, Y., Deng, A., Wang, D., Hu, D.: Balanced multimodal learning via on-the-fly gradient modulation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8238\u20138247 (2022)","DOI":"10.1109\/CVPR52688.2022.00806"},{"issue":"2","key":"11_CR29","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1109\/JSAIT.2022.3205475","volume":"3","author":"A Reisizadeh","year":"2022","unstructured":"Reisizadeh, A., Tziotis, I., Hassani, H., Mokhtari, A., Pedarsani, R.: Straggler-resilient federated learning: leveraging the interplay between statistical accuracy and system heterogeneity. IEEE J. Sel. Areas Inf. Theory 3(2), 197\u2013205 (2022)","journal-title":"IEEE J. Sel. Areas Inf. Theory"},{"key":"11_CR30","doi-asserted-by":"crossref","unstructured":"Robbins, H., Monro, S.: A stochastic approximation method. Ann. Math. Stat. 400\u2013407 (1951)","DOI":"10.1214\/aoms\/1177729586"},{"key":"11_CR31","doi-asserted-by":"crossref","unstructured":"Su, H., Maji, S., Kalogerakis, E., Learned-Miller, E.: Multi-view convolutional neural networks for 3D shape recognition. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 945\u2013953 (2015)","DOI":"10.1109\/ICCV.2015.114"},{"key":"11_CR32","doi-asserted-by":"crossref","unstructured":"Tan, Y., et al.: Fedproto: federated prototype learning across heterogeneous clients. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a036, pp. 8432\u20138440 (2022)","DOI":"10.1609\/aaai.v36i8.20819"},{"key":"11_CR33","doi-asserted-by":"crossref","unstructured":"Tian, Y., Shi, J., Li, B., Duan, Z., Xu, C.: Audio-visual event localization in unconstrained videos. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 247\u2013263 (2018)","DOI":"10.1007\/978-3-030-01216-8_16"},{"key":"11_CR34","doi-asserted-by":"crossref","unstructured":"Wang, H., Li, Y., Xu, W., Li, R., Zhan, Y., Zeng, Z.: DAFKD: domain-aware federated knowledge distillation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20412\u201320421 (2023)","DOI":"10.1109\/CVPR52729.2023.01955"},{"issue":"8","key":"11_CR35","doi-asserted-by":"publisher","first-page":"2541","DOI":"10.1109\/JSAC.2021.3087272","volume":"39","author":"H Wang","year":"2021","unstructured":"Wang, H., Qu, Z., Guo, S., Wang, N., Li, R., Zhuang, W.: LOSP: overlap synchronization parallel with local compensation for fast distributed training. IEEE J. Sel. Areas Commun. 39(8), 2541\u20132557 (2021)","journal-title":"IEEE J. Sel. Areas Commun."},{"key":"11_CR36","doi-asserted-by":"crossref","unstructured":"Wang, H., Xu, W., Fan, Y., Li, R., Zhou, P.: AOCC-FL: federated learning with aligned overlapping via calibrated compensation. In: IEEE INFOCOM 2023-IEEE Conference on Computer Communications, pp. 1\u201310. IEEE (2023)","DOI":"10.1109\/INFOCOM53939.2023.10229011"},{"key":"11_CR37","doi-asserted-by":"crossref","unstructured":"Wang, W., Tran, D., Feiszli, M.: What makes training multi-modal classification networks hard? In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12695\u201312705 (2020)","DOI":"10.1109\/CVPR42600.2020.01271"},{"key":"11_CR38","unstructured":"Wu, N., Jastrzebski, S., Cho, K., Geras, K.J.: Characterizing and overcoming the greedy nature of learning in multi-modal deep neural networks. In: International Conference on Machine Learning, pp. 24043\u201324055. PMLR (2022)"},{"key":"11_CR39","unstructured":"Wu, Z., et al.: 3D shapenets: a deep representation for volumetric shapes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1912\u20131920 (2015)"},{"key":"11_CR40","unstructured":"Xiao, F., Lee, Y.J., Grauman, K., Malik, J., Feichtenhofer, C.: Audiovisual slowfast networks for video recognition. arXiv preprint arXiv:2001.08740 (2020)"},{"key":"11_CR41","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1016\/j.neucom.2022.01.063","volume":"480","author":"B Xiong","year":"2022","unstructured":"Xiong, B., Yang, X., Qi, F., Xu, C.: A unified framework for multi-modal federated learning. Neurocomputing 480, 110\u2013118 (2022)","journal-title":"Neurocomputing"},{"key":"11_CR42","unstructured":"Xu, J., Tong, X., Huang, S.L.: Personalized federated learning with feature alignment and classifier collaboration. arXiv preprint arXiv:2306.11867 (2023)"},{"issue":"2","key":"11_CR43","doi-asserted-by":"publisher","first-page":"1188","DOI":"10.1109\/TWC.2020.3031503","volume":"20","author":"J Xu","year":"2020","unstructured":"Xu, J., Wang, H.: Client selection and bandwidth allocation in wireless federated learning networks: a long-term perspective. IEEE Trans. Wirel. Commun. 20(2), 1188\u20131200 (2020)","journal-title":"IEEE Trans. Wirel. Commun."},{"key":"11_CR44","unstructured":"Yu, Q., Liu, Y., Wang, Y., Xu, K., Liu, J.: Multimodal federated learning via contrastive representation ensemble. arXiv preprint arXiv:2302.08888 (2023)"},{"key":"11_CR45","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Barnaghi, P., Haddadi, H.: Multimodal federated learning on IoT data. In: 2022 IEEE\/ACM Seventh International Conference on Internet-of-Things Design and Implementation (IoTDI), pp. 43\u201354. IEEE (2022)","DOI":"10.1109\/IoTDI54339.2022.00011"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73004-7_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T17:06:57Z","timestamp":1730394417000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73004-7_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,1]]},"ISBN":["9783031730030","9783031730047"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73004-7_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,1]]},"assertion":[{"value":"1 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}