{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T15:50:08Z","timestamp":1774021808711,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":95,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,24]],"date-time":"2024-08-24T00:00:00Z","timestamp":1724457600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSFC","award":["62171276"],"award-info":[{"award-number":["62171276"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100003399","name":"Science and Technology Commission of Shanghai Municipality","doi-asserted-by":"publisher","award":["22511106101"],"award-info":[{"award-number":["22511106101"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100003399","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Key R&D Program of China","award":["2021ZD0112801"],"award-info":[{"award-number":["2021ZD0112801"]}]},{"name":"Science and Technology Commission of Shanghai Municipal","award":["21511100900"],"award-info":[{"award-number":["21511100900"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,25]]},"DOI":"10.1145\/3637528.3671582","type":"proceedings-article","created":{"date-parts":[[2024,8,25]],"date-time":"2024-08-25T04:54:55Z","timestamp":1724561695000},"page":"6137-6147","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":75,"title":["OpenFedLLM: Training Large Language Models on Decentralized Private Data via Federated Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-5998-8200","authenticated-orcid":false,"given":"Rui","family":"Ye","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-2165-9693","authenticated-orcid":false,"given":"Wenhao","family":"Wang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2959-3274","authenticated-orcid":false,"given":"Jingyi","family":"Chai","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2701-669X","authenticated-orcid":false,"given":"Dihan","family":"Li","sequence":"additional","affiliation":[{"name":"University of Southern California, Los Angeles, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0831-3549","authenticated-orcid":false,"given":"Zexi","family":"Li","sequence":"additional","affiliation":[{"name":"Zhejiang University, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8769-8363","authenticated-orcid":false,"given":"Yinda","family":"Xu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3182-1409","authenticated-orcid":false,"given":"Yaxin","family":"Du","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8882-6311","authenticated-orcid":false,"given":"Yanfeng","family":"Wang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University &amp; Shanghai AI Laboratory, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6199-529X","authenticated-orcid":false,"given":"Siheng","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University &amp; Shanghai AI Laboratory, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2024,8,24]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Yuntao Bai Andy Jones Kamal Ndousse Amanda Askell Anna Chen Nova DasSarma Dawn Drain Stanislav Fort Deep Ganguli Tom Henighan et al. 2022. Training a helpful and harmless assistant with reinforcement learning from human feedback. arXiv preprint arXiv:2204.05862 (2022)."},{"key":"e_1_3_2_1_2_1","unstructured":"Yuntao Bai Saurav Kadavath Sandipan Kundu Amanda Askell Jackson Kernion Andy Jones Anna Chen Anna Goldie Azalia Mirhoseini Cameron McKinnon et al. 2022. Constitutional ai: Harmlessness from ai feedback. arXiv preprint arXiv:2212.08073 (2022)."},{"key":"e_1_3_2_1_3_1","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, et al. 2020. Language models are few-shot learners. NIPS, Vol. 33 (2020), 1877--1901.","journal-title":"NIPS"},{"key":"e_1_3_2_1_4_1","volume-title":"Federated large language model: A position paper. arXiv preprint arXiv:2307.08925","author":"Chen Chaochao","year":"2023","unstructured":"Chaochao Chen, Xiaohua Feng, Jun Zhou, Jianwei Yin, and Xiaolin Zheng. 2023. Federated large language model: A position paper. arXiv preprint arXiv:2307.08925 (2023)."},{"key":"e_1_3_2_1_5_1","unstructured":"Mark Chen Jerry Tworek Heewoo Jun Qiming Yuan Henrique Ponde de Oliveira Pinto Jared Kaplan Harri Edwards Yuri Burda Nicholas Joseph Greg Brockman Alex Ray Raul Puri Gretchen Krueger Michael Petrov Heidy Khlaaf Girish Sastry Pamela Mishkin Brooke Chan Scott Gray Nick Ryder Mikhail Pavlov Alethea Power Lukasz Kaiser Mohammad Bavarian Clemens Winter Philippe Tillet Felipe Petroski Such Dave Cummings Matthias Plappert Fotios Chantzis Elizabeth Barnes Ariel Herbert-Voss William Hebgen Guss Alex Nichol Alex Paino Nikolas Tezak Jie Tang Igor Babuschkin Suchir Balaji Shantanu Jain William Saunders Christopher Hesse Andrew N. Carr Jan Leike Josh Achiam Vedant Misra Evan Morikawa Alec Radford Matthew Knight Miles Brundage Mira Murati Katie Mayer Peter Welinder Bob McGrew Dario Amodei Sam McCandlish Ilya Sutskever and Wojciech Zaremba. 2021. Evaluating Large Language Models Trained on Code. arxiv: 2107.03374 [cs.LG]"},{"key":"e_1_3_2_1_6_1","volume-title":"Training deep nets with sublinear memory cost. arXiv preprint arXiv:1604.06174","author":"Chen Tianqi","year":"2016","unstructured":"Tianqi Chen, Bing Xu, Chiyuan Zhang, and Carlos Guestrin. 2016. Training deep nets with sublinear memory cost. arXiv preprint arXiv:1604.06174 (2016)."},{"key":"e_1_3_2_1_7_1","volume-title":"Agentverse: Facilitating multi-agent collaboration and exploring emergent behaviors in agents. arXiv preprint arXiv:2308.10848","author":"Chen Weize","year":"2023","unstructured":"Weize Chen, Yusheng Su, Jingwei Zuo, Cheng Yang, Chenfei Yuan, Chen Qian, Chi-Min Chan, Yujia Qin, Yaxi Lu, Ruobing Xie, et al. 2023. Agentverse: Facilitating multi-agent collaboration and exploring emergent behaviors in agents. arXiv preprint arXiv:2308.10848 (2023)."},{"key":"e_1_3_2_1_8_1","volume-title":"INSTRUCTEVAL: Towards Holistic Evaluation of Instruction-Tuned Large Language Models. arXiv preprint arXiv:2306.04757","author":"Chia Yew Ken","year":"2023","unstructured":"Yew Ken Chia, Pengfei Hong, Lidong Bing, and Soujanya Poria. 2023. INSTRUCTEVAL: Towards Holistic Evaluation of Instruction-Tuned Large Language Models. arXiv preprint arXiv:2306.04757 (2023)."},{"key":"e_1_3_2_1_9_1","volume-title":"Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality. See https:\/\/vicuna. lmsys. org (accessed","author":"Chiang Wei-Lin","year":"2023","unstructured":"Wei-Lin Chiang, Zhuohan Li, Zi Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph E Gonzalez, et al. 2023. Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality. See https:\/\/vicuna. lmsys. org (accessed 14 April 2023) (2023)."},{"key":"e_1_3_2_1_10_1","volume-title":"Client selection in federated learning: Convergence analysis and power-of-choice selection strategies. arXiv preprint arXiv:2010.01243","author":"Cho Yae Jee","year":"2020","unstructured":"Yae Jee Cho, Jianyu Wang, and Gauri Joshi. 2020. Client selection in federated learning: Convergence analysis and power-of-choice selection strategies. arXiv preprint arXiv:2010.01243 (2020)."},{"key":"e_1_3_2_1_11_1","volume-title":"Charles Sutton, Sebastian Gehrmann, et al.","author":"Chowdhery Aakanksha","year":"2022","unstructured":"Aakanksha Chowdhery, Sharan Narang, Jacob Devlin, Maarten Bosma, Gaurav Mishra, Adam Roberts, Paul Barham, Hyung Won Chung, Charles Sutton, Sebastian Gehrmann, et al. 2022. Palm: Scaling language modeling with pathways. arXiv preprint arXiv:2204.02311 (2022)."},{"key":"e_1_3_2_1_12_1","volume-title":"Deep reinforcement learning from human preferences. Advances in neural information processing systems","author":"Christiano Paul F","year":"2017","unstructured":"Paul F Christiano, Jan Leike, Tom Brown, Miljan Martic, Shane Legg, and Dario Amodei. 2017. Deep reinforcement learning from human preferences. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_13_1","unstructured":"Karl Cobbe Vineet Kosaraju Mohammad Bavarian Mark Chen Heewoo Jun Lukasz Kaiser Matthias Plappert Jerry Tworek Jacob Hilton Reiichiro Nakano et al. 2021. Training verifiers to solve math word problems. arXiv preprint arXiv:2110.14168 (2021)."},{"key":"e_1_3_2_1_14_1","unstructured":"Ganqu Cui Lifan Yuan Ning Ding Guanming Yao Wei Zhu Yuan Ni Guotong Xie Zhiyuan Liu and Maosong Sun. 2023. UltraFeedback: Boosting Language Models with High-quality Feedback. arxiv: 2310.01377 [cs.CL]"},{"key":"e_1_3_2_1_15_1","volume-title":"Enhancing Chat Language Models by Scaling High-quality Instructional Conversations. arXiv preprint arXiv:2305.14233","author":"Ding Ning","year":"2023","unstructured":"Ning Ding, Yulin Chen, Bokai Xu, Yujia Qin, Zhi Zheng, Shengding Hu, Zhiyuan Liu, Maosong Sun, and Bowen Zhou. 2023. Enhancing Chat Language Models by Scaling High-quality Instructional Conversations. arXiv preprint arXiv:2305.14233 (2023)."},{"key":"e_1_3_2_1_16_1","volume-title":"Proc. of NAACL.","author":"Dua Dheeru","year":"2019","unstructured":"Dheeru Dua, Yizhong Wang, Pradeep Dasigi, Gabriel Stanovsky, Sameer Singh, and Matt Gardner. 2019. DROP: A Reading Comprehension Benchmark Requiring Discrete Reasoning Over Paragraphs. In Proc. of NAACL."},{"key":"e_1_3_2_1_17_1","volume-title":"Fate-llm: A industrial grade federated learning framework for large language models. arXiv preprint arXiv:2310.10049","author":"Fan Tao","year":"2023","unstructured":"Tao Fan, Yan Kang, Guoqiang Ma, Weijing Chen, Wenbin Wei, Lixin Fan, and Qiang Yang. 2023. Fate-llm: A industrial grade federated learning framework for large language models. arXiv preprint arXiv:2310.10049 (2023)."},{"key":"e_1_3_2_1_18_1","volume-title":"Yuhan Liu, and Yulia Tsvetkov.","author":"Feng Shangbin","year":"2023","unstructured":"Shangbin Feng, Chan Young Park, Yuhan Liu, and Yulia Tsvetkov. 2023. From Pretraining Data to Language Models to Downstream Tasks: Tracking the Trails of Political Biases Leading to Unfair NLP Models. arXiv preprint arXiv:2305.08283 (2023)."},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the Thirteenth Language Resources and Evaluation Conference. 2126--2140","author":"Frohberg J\u00f6rg","year":"2022","unstructured":"J\u00f6rg Frohberg and Frank Binder. 2022. CRASS: A Novel Data Set and Benchmark to Test Counterfactual Reasoning of Large Language Models. In Proceedings of the Thirteenth Language Resources and Evaluation Conference. 2126--2140."},{"key":"e_1_3_2_1_20_1","unstructured":"Deep Ganguli Liane Lovitt Jackson Kernion Amanda Askell Yuntao Bai Saurav Kadavath Ben Mann Ethan Perez Nicholas Schiefer Kamal Ndousse et al. 2022. Red teaming language models to reduce harms: Methods scaling behaviors and lessons learned. arXiv preprint arXiv:2209.07858 (2022)."},{"key":"e_1_3_2_1_21_1","unstructured":"Leo Gao Stella Biderman Sid Black Laurence Golding Travis Hoppe Charles Foster Jason Phang Horace He Anish Thite Noa Nabeshima et al. 2020. The pile: An 800gb dataset of diverse text for language modeling. arXiv preprint arXiv:2101.00027 (2020)."},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Hendrycks Dan","year":"2021","unstructured":"Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt. 2021. Measuring Massive Multitask Language Understanding. Proceedings of the International Conference on Learning Representations (ICLR) (2021)."},{"key":"e_1_3_2_1_23_1","volume-title":"Measuring the effects of non-identical data distribution for federated visual classification. arXiv preprint arXiv:1909.06335","author":"Harry Hsu Tzu-Ming","year":"2019","unstructured":"Tzu-Ming Harry Hsu, Hang Qi, and Matthew Brown. 2019. Measuring the effects of non-identical data distribution for federated visual classification. arXiv preprint arXiv:1909.06335 (2019)."},{"key":"e_1_3_2_1_24_1","unstructured":"Edward J Hu Phillip Wallis Zeyuan Allen-Zhu Yuanzhi Li Shean Wang Lu Wang Weizhu Chen et al. 2021. LoRA: Low-Rank Adaptation of Large Language Models. In ICLR."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-industry.4"},{"key":"e_1_3_2_1_26_1","unstructured":"FedML Inc. 2023. Federated Learning on Large Language Models (LLMs). https:\/\/doc.fedml.ai\/federate\/fedllm. Accessed: 2024-03--31."},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"14729","author":"Jang Joel","year":"2023","unstructured":"Joel Jang, Seungone Kim, Seonghyeon Ye, Doyoung Kim, Lajanugen Logeswaran, Moontae Lee, Kyungjae Lee, and Minjoon Seo. 2023. Exploring the Benefits of Training Expert Language Models over Instruction Tuning. In Proceedings of the 40th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 202), Andreas Krause, Emma Brunskill, Kyunghyun Cho, Barbara Engelhardt, Sivan Sabato, and Jonathan Scarlett (Eds.). PMLR, 14702--14729. https:\/\/proceedings.mlr.press\/v202\/jang23a.html"},{"key":"e_1_3_2_1_28_1","unstructured":"Jiaming Ji Tianyi Qiu Boyuan Chen Borong Zhang Hantao Lou Kaile Wang Yawen Duan Zhonghao He Jiayi Zhou Zhaowei Zhang et al. 2023. AI Alignment: A Comprehensive Survey. arXiv preprint arXiv:2310.19852 (2023)."},{"key":"e_1_3_2_1_29_1","volume-title":"Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al.","author":"Jiang Albert Q","year":"2023","unstructured":"Albert Q Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al. 2023. Mistral 7B. arXiv preprint arXiv:2310.06825 (2023)."},{"key":"e_1_3_2_1_30_1","volume-title":"Kallista Bonawitz, Zachary Charles, Graham Cormode, Rachel Cummings, et al.","author":"Kairouz Peter","year":"2021","unstructured":"Peter Kairouz, H Brendan McMahan, Brendan Avent, Aur\u00e9lien Bellet, Mehdi Bennis, Arjun Nitin Bhagoji, Kallista Bonawitz, Zachary Charles, Graham Cormode, Rachel Cummings, et al. 2021. Advances and open problems in federated learning. Foundations and Trends\u00ae in Machine Learning, Vol. 14, 1--2 (2021), 1--210."},{"key":"e_1_3_2_1_31_1","volume-title":"Scaling laws for neural language models. arXiv preprint arXiv:2001.08361","author":"Kaplan Jared","year":"2020","unstructured":"Jared Kaplan, Sam McCandlish, Tom Henighan, Tom B Brown, Benjamin Chess, Rewon Child, Scott Gray, Alec Radford, Jeffrey Wu, and Dario Amodei. 2020. Scaling laws for neural language models. arXiv preprint arXiv:2001.08361 (2020)."},{"key":"e_1_3_2_1_32_1","first-page":"28663","article-title":"Breaking the centralized barrier for cross-device federated learning","volume":"34","author":"Karimireddy Sai Praneeth","year":"2021","unstructured":"Sai Praneeth Karimireddy, Martin Jaggi, Satyen Kale, Mehryar Mohri, Sashank Reddi, Sebastian U Stich, and Ananda Theertha Suresh. 2021. Breaking the centralized barrier for cross-device federated learning. Advances in Neural Information Processing Systems, Vol. 34 (2021), 28663--28676.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_33_1","volume-title":"International Conference on Machine Learning. PMLR, 5132--5143","author":"Karimireddy Sai Praneeth","year":"2020","unstructured":"Sai Praneeth Karimireddy, Satyen Kale, Mehryar Mohri, Sashank Reddi, Sebastian Stich, and Ananda Theertha Suresh. 2020. Scaffold: Stochastic controlled averaging for federated learning. In International Conference on Machine Learning. PMLR, 5132--5143."},{"key":"e_1_3_2_1_34_1","volume-title":"The Past, Present and Better Future of Feedback Learning in Large Language Models for Subjective Human Preferences and Values. arXiv preprint arXiv:2310.07629","author":"Kirk Hannah Rose","year":"2023","unstructured":"Hannah Rose Kirk, Andrew M Bean, Bertie Vidgen, Paul R\u00f6ttger, and Scott A Hale. 2023. The Past, Present and Better Future of Feedback Learning in Large Language Models for Subjective Human Preferences and Values. arXiv preprint arXiv:2310.07629 (2023)."},{"key":"e_1_3_2_1_35_1","first-page":"22199","article-title":"Large language models are zero-shot reasoners","volume":"35","author":"Kojima Takeshi","year":"2022","unstructured":"Takeshi Kojima, Shixiang Shane Gu, Machel Reid, Yutaka Matsuo, and Yusuke Iwasawa. 2022. Large language models are zero-shot reasoners. NIPS, Vol. 35 (2022), 22199--22213.","journal-title":"NIPS"},{"key":"e_1_3_2_1_36_1","volume-title":"FederatedScope-LLM: A comprehensive package for fine-tuning large language models in federated learning. arXiv preprint arXiv:2309.00363","author":"Kuang Weirui","year":"2023","unstructured":"Weirui Kuang, Bingchen Qian, Zitao Li, Daoyuan Chen, Dawei Gao, Xuchen Pan, Yuexiang Xie, Yaliang Li, Bolin Ding, and Jingren Zhou. 2023. FederatedScope-LLM: A comprehensive package for fine-tuning large language models in federated learning. arXiv preprint arXiv:2309.00363 (2023)."},{"key":"e_1_3_2_1_37_1","volume-title":"Camille Elepa no, Maria Madriaga, Rimel Aggabao, Giezel Diaz-Candido, James Maningo, et al.","author":"Kung Tiffany H","year":"2023","unstructured":"Tiffany H Kung, Morgan Cheatham, Arielle Medenilla, Czarina Sillos, Lorie De Leon, Camille Elepa no, Maria Madriaga, Rimel Aggabao, Giezel Diaz-Candido, James Maningo, et al. 2023. Performance of ChatGPT on USMLE: Potential for AI-assisted medical education using large language models. PLoS digital health, Vol. 2, 2 (2023), e0000198."},{"key":"e_1_3_2_1_38_1","volume-title":"Beyond Scale: the Diversity Coefficient as a Data Quality Metric Demonstrates LLMs are Pre-trained on Formally Diverse Data. arXiv preprint arXiv:2306.13840","author":"Lee Alycia","year":"2023","unstructured":"Alycia Lee, Brando Miranda, and Sanmi Koyejo. 2023. Beyond Scale: the Diversity Coefficient as a Data Quality Metric Demonstrates LLMs are Pre-trained on Formally Diverse Data. arXiv preprint arXiv:2306.13840 (2023)."},{"key":"e_1_3_2_1_39_1","volume-title":"RLAIF: Scaling Reinforcement Learning from Human Feedback with AI Feedback. arXiv preprint arXiv:2309.00267","author":"Lee Harrison","year":"2023","unstructured":"Harrison Lee, Samrat Phatale, Hassan Mansoor, Kellie Lu, Thomas Mesnard, Colton Bishop, Victor Carbune, and Abhinav Rastogi. 2023. RLAIF: Scaling Reinforcement Learning from Human Feedback with AI Feedback. arXiv preprint arXiv:2309.00267 (2023)."},{"key":"e_1_3_2_1_40_1","volume-title":"From quantity to quality: Boosting llm performance with self-guided data selection for instruction tuning. arXiv preprint arXiv:2308.12032","author":"Li Ming","year":"2023","unstructured":"Ming Li, Yong Zhang, Zhitao Li, Jiuhai Chen, Lichang Chen, Ning Cheng, Jianzong Wang, Tianyi Zhou, and Jing Xiao. 2023. From quantity to quality: Boosting llm performance with self-guided data selection for instruction tuning. arXiv preprint arXiv:2308.12032 (2023)."},{"key":"e_1_3_2_1_41_1","first-page":"429","article-title":"Federated optimization in heterogeneous networks","volume":"2","author":"Li Tian","year":"2020","unstructured":"Tian Li, Anit Kumar Sahu, Manzil Zaheer, Maziar Sanjabi, Ameet Talwalkar, and Virginia Smith. 2020. Federated optimization in heterogeneous networks. Proceedings of Machine Learning and Systems, Vol. 2 (2020), 429--450.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_42_1","volume-title":"Fair Resource Allocation in Federated Learning. In International Conference on Learning Representations.","author":"Li Tian","year":"2020","unstructured":"Tian Li, Maziar Sanjabi, Ahmad Beirami, and Virginia Smith. 2020. Fair Resource Allocation in Federated Learning. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_43_1","volume-title":"On the Convergence of FedAvg on Non-IID Data. In International Conference on Learning Representations.","author":"Li Xiang","year":"2019","unstructured":"Xiang Li, Kaixuan Huang, Wenhao Yang, Shusen Wang, and Zhihua Zhang. 2019. On the Convergence of FedAvg on Non-IID Data. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"e_1_3_2_1_45_1","volume-title":"Revisiting weighted aggregation in federated learning with neural networks. arXiv preprint arXiv:2302.10911","author":"Li Zexi","year":"2023","unstructured":"Zexi Li, Tao Lin, Xinyi Shang, and Chao Wu. 2023. Revisiting weighted aggregation in federated learning with neural networks. arXiv preprint arXiv:2302.10911 (2023)."},{"key":"e_1_3_2_1_46_1","first-page":"1950","article-title":"Few-shot parameter-efficient fine-tuning is better and cheaper than in-context learning","volume":"35","author":"Liu Haokun","year":"2022","unstructured":"Haokun Liu, Derek Tam, Mohammed Muqeeth, Jay Mohta, Tenghao Huang, Mohit Bansal, and Colin A Raffel. 2022. Few-shot parameter-efficient fine-tuning is better and cheaper than in-context learning. Advances in Neural Information Processing Systems, Vol. 35 (2022), 1950--1965.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-short.8"},{"key":"e_1_3_2_1_48_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"22648","author":"Longpre Shayne","year":"2023","unstructured":"Shayne Longpre, Le Hou, Tu Vu, Albert Webson, Hyung Won Chung, Yi Tay, Denny Zhou, Quoc V Le, Barret Zoph, Jason Wei, and Adam Roberts. 2023. The Flan Collection: Designing Data and Methods for Effective Instruction Tuning. In Proceedings of the 40th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 202). PMLR, 22631--22648."},{"key":"e_1_3_2_1_49_1","volume-title":"Decoupled Weight Decay Regularization. In International Conference on Learning Representations.","author":"Loshchilov Ilya","year":"2018","unstructured":"Ilya Loshchilov and Frank Hutter. 2018. Decoupled Weight Decay Regularization. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_50_1","volume-title":"Twitter financial news sentiment. https:\/\/huggingface.co\/datasets\/zeroshot\/twitter-financial-news-sentiment","author":"Magic Neural","year":"2022","unstructured":"Neural Magic. 2022. Twitter financial news sentiment. https:\/\/huggingface.co\/datasets\/zeroshot\/twitter-financial-news-sentiment (2022)."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3184558.3192301"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1002\/asi.23062"},{"key":"e_1_3_2_1_53_1","unstructured":"Brendan McMahan Eider Moore Daniel Ramage Seth Hampson and Blaise Aguera y Arcas. 2017. Communication-efficient learning of deep networks from decentralized data. In Artificial intelligence and statistics. PMLR 1273--1282."},{"key":"e_1_3_2_1_54_1","volume-title":"Orca: Progressive learning from complex explanation traces of gpt-4. arXiv preprint arXiv:2306.02707","author":"Mukherjee Subhabrata","year":"2023","unstructured":"Subhabrata Mukherjee, Arindam Mitra, Ganesh Jawahar, Sahaj Agarwal, Hamid Palangi, and Ahmed Awadallah. 2023. Orca: Progressive learning from complex explanation traces of gpt-4. arXiv preprint arXiv:2306.02707 (2023)."},{"key":"e_1_3_2_1_56_1","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, et al. 2022. Training language models to follow instructions with human feedback. NIPS, Vol. 35 (2022), 27730--27744.","journal-title":"NIPS"},{"key":"e_1_3_2_1_57_1","volume-title":"Instruction Tuning with GPT-4. arXiv preprint arXiv:2304.03277","author":"Peng Baolin","year":"2023","unstructured":"Baolin Peng, Chunyuan Li, Pengcheng He, Michel Galley, and Jianfeng Gao. 2023. Instruction Tuning with GPT-4. arXiv preprint arXiv:2304.03277 (2023)."},{"key":"e_1_3_2_1_58_1","volume-title":"Direct preference optimization: Your language model is secretly a reward model. arXiv preprint arXiv:2305.18290","author":"Rafailov Rafael","year":"2023","unstructured":"Rafael Rafailov, Archit Sharma, Eric Mitchell, Stefano Ermon, Christopher D Manning, and Chelsea Finn. 2023. Direct preference optimization: Your language model is secretly a reward model. arXiv preprint arXiv:2305.18290 (2023)."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_1_60_1","volume-title":"Adaptive Federated Optimization. In International Conference on Learning Representations.","author":"Reddi Sashank J","year":"2020","unstructured":"Sashank J Reddi, Zachary Charles, Manzil Zaheer, Zachary Garrett, Keith Rush, Jakub Konevcn\u1ef3, Sanjiv Kumar, and Hugh Brendan McMahan. 2020. Adaptive Federated Optimization. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_61_1","volume-title":"Yossi Adi, Jingyu Liu, Tal Remez, J\u00e9r\u00e9my Rapin, et al.","author":"Roziere Baptiste","year":"2023","unstructured":"Baptiste Roziere, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, J\u00e9r\u00e9my Rapin, et al. 2023. Code llama: Open foundation models for code. arXiv preprint arXiv:2308.12950 (2023)."},{"key":"e_1_3_2_1_62_1","unstructured":"Victor Sanh Albert Webson Colin Raffel Stephen Bach Lintang Sutawika Zaid Alyafeai Antoine Chaffin Arnaud Stiegler Arun Raja Manan Dey et al. 2021. Multitask Prompted Training Enables Zero-Shot Task Generalization. In ICLR."},{"key":"e_1_3_2_1_63_1","volume-title":"Clustered federated learning: Model-agnostic distributed multitask optimization under privacy constraints","author":"Sattler Felix","year":"2020","unstructured":"Felix Sattler, Klaus-Robert M\u00fcller, and Wojciech Samek. 2020. Clustered federated learning: Model-agnostic distributed multitask optimization under privacy constraints. IEEE transactions on neural networks and learning systems, Vol. 32, 8 (2020), 3710--3722."},{"key":"e_1_3_2_1_64_1","volume-title":"Franccois Yvon, Matthias Gall\u00e9, et al.","author":"Scao Teven Le","year":"2022","unstructured":"Teven Le Scao, Angela Fan, Christopher Akiki, Ellie Pavlick, Suzana Ili\u0107, Daniel Hesslow, Roman Castagn\u00e9, Alexandra Sasha Luccioni, Franccois Yvon, Matthias Gall\u00e9, et al. 2022. Bloom: A 176b-parameter open-access multilingual language model. arXiv preprint arXiv:2211.05100 (2022)."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_66_1","unstructured":"Karan Singhal Tao Tu Juraj Gottweis Rory Sayres Ellery Wulczyn Le Hou Kevin Clark Stephen Pfohl Heather Cole-Lewis Darlene Neal et al. 2023. Towards expert-level medical question answering with large language models. arXiv preprint arXiv:2305.09617 (2023)."},{"key":"e_1_3_2_1_67_1","volume-title":"Abubakar Abid, Adam Fisch, Adam R Brown, Adam Santoro, Aditya Gupta, Adri\u00e0 Garriga-Alonso, et al.","author":"Srivastava Aarohi","year":"2023","unstructured":"Aarohi Srivastava, Abhinav Rastogi, Abhishek Rao, Abu Awal Md Shoeb, Abubakar Abid, Adam Fisch, Adam R Brown, Adam Santoro, Aditya Gupta, Adri\u00e0 Garriga-Alonso, et al. 2023. Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models. Transactions on Machine Learning Research (2023)."},{"key":"e_1_3_2_1_68_1","volume-title":"Aakanksha Chowdhery, Quoc V Le, Ed H Chi, Denny Zhou,, and Jason Wei.","author":"Suzgun Mirac","year":"2022","unstructured":"Mirac Suzgun, Nathan Scales, Nathanael Sch\u00e4rli, Sebastian Gehrmann, Yi Tay, Hyung Won Chung, Aakanksha Chowdhery, Quoc V Le, Ed H Chi, Denny Zhou,, and Jason Wei. 2022. Challenging BIG-Bench Tasks and Whether Chain-of-Thought Can Solve Them. arXiv preprint arXiv:2210.09261 (2022)."},{"key":"e_1_3_2_1_69_1","volume-title":"Hashimoto","author":"Taori Rohan","year":"2023","unstructured":"Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li, Carlos Guestrin, Percy Liang, and Tatsunori B. Hashimoto. 2023. Stanford Alpaca: An Instruction-following LLaMA model. https:\/\/github.com\/tatsu-lab\/stanford_alpaca."},{"key":"e_1_3_2_1_70_1","volume-title":"Kabilan Elangovan, Laura Gutierrez, Ting Fang Tan, and Daniel Shu Wei Ting.","author":"Thirunavukarasu Arun James","year":"2023","unstructured":"Arun James Thirunavukarasu, Darren Shu Jeng Ting, Kabilan Elangovan, Laura Gutierrez, Ting Fang Tan, and Daniel Shu Wei Ting. 2023. Large language models in medicine. Nature medicine, Vol. 29, 8 (2023), 1930--1940."},{"key":"e_1_3_2_1_71_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_72_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_73_1","volume-title":"Zephyr: Direct distillation of lm alignment. arXiv preprint arXiv:2310.16944","author":"Tunstall Lewis","year":"2023","unstructured":"Lewis Tunstall, Edward Beeching, Nathan Lambert, Nazneen Rajani, Kashif Rasul, Younes Belkada, Shengyi Huang, Leandro von Werra, Cl\u00e9mentine Fourrier, Nathan Habib, et al. 2023. Zephyr: Direct distillation of lm alignment. arXiv preprint arXiv:2310.16944 (2023)."},{"key":"e_1_3_2_1_74_1","volume-title":"Will we run out of data? An analysis of the limits of scaling datasets in Machine Learning. arXiv preprint arXiv:2211.04325","author":"Villalobos Pablo","year":"2022","unstructured":"Pablo Villalobos, Jaime Sevilla, Lennart Heim, Tamay Besiroglu, Marius Hobbhahn, and Anson Ho. 2022. Will we run out of data? An analysis of the limits of scaling datasets in Machine Learning. arXiv preprint arXiv:2211.04325 (2022)."},{"key":"e_1_3_2_1_75_1","volume-title":"Voyager: An open-ended embodied agent with large language models. arXiv preprint arXiv:2305.16291","author":"Wang Guanzhi","year":"2023","unstructured":"Guanzhi Wang, Yuqi Xie, Yunfan Jiang, Ajay Mandlekar, Chaowei Xiao, Yuke Zhu, Linxi Fan, and Anima Anandkumar. 2023. Voyager: An open-ended embodied agent with large language models. arXiv preprint arXiv:2305.16291 (2023)."},{"key":"e_1_3_2_1_76_1","volume-title":"Tackling the objective inconsistency problem in heterogeneous federated optimization. Advances in neural information processing systems","author":"Wang Jianyu","year":"2020","unstructured":"Jianyu Wang, Qinghua Liu, Hao Liang, Gauri Joshi, and H Vincent Poor. 2020. Tackling the objective inconsistency problem in heterogeneous federated optimization. Advances in neural information processing systems, Vol. 33 (2020), 7611--7623."},{"key":"e_1_3_2_1_77_1","volume-title":"David Wadden","author":"Wang Yizhong","year":"2023","unstructured":"Yizhong Wang, Hamish Ivison, Pradeep Dasigi, Jack Hessel, Tushar Khot, Khyathi Raghavi Chandu, David Wadden, Kelsey MacMillan, Noah A Smith, Iz Beltagy, et al. 2023. How Far Can Camels Go? Exploring the State of Instruction Tuning on Open Resources. arXiv preprint arXiv:2306.04751 (2023)."},{"key":"e_1_3_2_1_78_1","volume-title":"Self-instruct: Aligning language model with self generated instructions. arXiv preprint arXiv:2212.10560","author":"Wang Yizhong","year":"2022","unstructured":"Yizhong Wang, Yeganeh Kordi, Swaroop Mishra, Alisa Liu, Noah A Smith, Daniel Khashabi, and Hannaneh Hajishirzi. 2022. Self-instruct: Aligning language model with self generated instructions. arXiv preprint arXiv:2212.10560 (2022)."},{"key":"e_1_3_2_1_79_1","volume-title":"Data Management For Large Language Models: A Survey. arXiv preprint arXiv:2312.01700","author":"Wang Zige","year":"2023","unstructured":"Zige Wang, Wanjun Zhong, Yufei Wang, Qi Zhu, Fei Mi, Baojun Wang, Lifeng Shang, Xin Jiang, and Qun Liu. 2023. Data Management For Large Language Models: A Survey. arXiv preprint arXiv:2312.01700 (2023)."},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41562-023-01659-w"},{"key":"e_1_3_2_1_81_1","volume-title":"Brian Lester, Nan Du, Andrew M Dai, and Quoc V Le.","author":"Wei Jason","year":"2021","unstructured":"Jason Wei, Maarten Bosma, Vincent Zhao, Kelvin Guu, Adams Wei Yu, Brian Lester, Nan Du, Andrew M Dai, and Quoc V Le. 2021. Finetuned Language Models are Zero-Shot Learners. In ICLR."},{"key":"e_1_3_2_1_82_1","first-page":"24824","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume":"35","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al. 2022. Chain-of-thought prompting elicits reasoning in large language models. NIPS, Vol. 35 (2022), 24824--24837.","journal-title":"NIPS"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-emnlp.210"},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"crossref","unstructured":"Thomas Wolf Lysandre Debut Victor Sanh Julien Chaumond Clement Delangue Anthony Moi Pierric Cistac Tim Rault R\u00e9mi Louf Morgan Funtowicz et al. 2019. Huggingface's transformers: State-of-the-art natural language processing. arXiv preprint arXiv:1910.03771 (2019).","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"e_1_3_2_1_85_1","volume-title":"Bloomberggpt: A large language model for finance. arXiv preprint arXiv:2303.17564","author":"Wu Shijie","year":"2023","unstructured":"Shijie Wu, Ozan Irsoy, Steven Lu, Vadim Dabravolski, Mark Dredze, Sebastian Gehrmann, Prabhanjan Kambadur, David Rosenberg, and Gideon Mann. 2023. Bloomberggpt: A large language model for finance. arXiv preprint arXiv:2303.17564 (2023)."},{"key":"e_1_3_2_1_86_1","volume-title":"Wizardlm: Empowering large language models to follow complex instructions. arXiv preprint arXiv:2304.12244","author":"Xu Can","year":"2023","unstructured":"Can Xu, Qingfeng Sun, Kai Zheng, Xiubo Geng, Pu Zhao, Jiazhan Feng, Chongyang Tao, and Daxin Jiang. 2023. Wizardlm: Empowering large language models to follow complex instructions. arXiv preprint arXiv:2304.12244 (2023)."},{"key":"e_1_3_2_1_87_1","unstructured":"Hongyang Yang. 2023. Data-Centric FinGPT. Open-source for open finance. https:\/\/github.com\/AI4Finance-Foundation\/FinGPT."},{"key":"e_1_3_2_1_88_1","volume-title":"Fake It Till Make It: Federated Learning with Consensus-Oriented Generation. arXiv preprint arXiv:2312.05966","author":"Ye Rui","year":"2023","unstructured":"Rui Ye, Yaxin Du, Zhenyang Ni, Siheng Chen, and Yanfeng Wang. 2023. Fake It Till Make It: Federated Learning with Consensus-Oriented Generation. arXiv preprint arXiv:2312.05966 (2023)."},{"key":"e_1_3_2_1_89_1","volume-title":"International Conference on Machine Learning. PMLR, 39801--39817","author":"Ye Rui","year":"2023","unstructured":"Rui Ye, Zhenyang Ni, Fangzhao Wu, Siheng Chen, and Yanfeng Wang. 2023. Personalized federated learning with inferred collaboration graphs. In International Conference on Machine Learning. PMLR, 39801--39817."},{"key":"e_1_3_2_1_90_1","volume-title":"FedDisco: Federated Learning with Discrepancy-Aware Collaboration. arXiv preprint arXiv:2305.19229","author":"Ye Rui","year":"2023","unstructured":"Rui Ye, Mingkai Xu, Jianyu Wang, Chenxin Xu, Siheng Chen, and Yanfeng Wang. 2023. FedDisco: Federated Learning with Discrepancy-Aware Collaboration. arXiv preprint arXiv:2305.19229 (2023)."},{"key":"e_1_3_2_1_91_1","volume-title":"Mammoth: Building math generalist models through hybrid instruction tuning. arXiv preprint arXiv:2309.05653","author":"Yue Xiang","year":"2023","unstructured":"Xiang Yue, Xingwei Qu, Ge Zhang, Yao Fu, Wenhao Huang, Huan Sun, Yu Su, and Wenhu Chen. 2023. Mammoth: Building math generalist models through hybrid instruction tuning. arXiv preprint arXiv:2309.05653 (2023)."},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.4489831"},{"key":"e_1_3_2_1_93_1","volume-title":"Towards Building the Federated GPT: Federated Instruction Tuning. arXiv preprint arXiv:2305.05644","author":"Zhang Jianyi","year":"2023","unstructured":"Jianyi Zhang, Saeed Vahidian, Martin Kuo, Chunyuan Li, Ruiyi Zhang, Guoyin Wang, and Yiran Chen. 2023. Towards Building the Federated GPT: Federated Instruction Tuning. arXiv preprint arXiv:2305.05644 (2023)."},{"key":"e_1_3_2_1_94_1","unstructured":"Lianmin Zheng Wei-Lin Chiang Ying Sheng Siyuan Zhuang Zhanghao Wu Yonghao Zhuang Zi Lin Zhuohan Li Dacheng Li Eric. P Xing Hao Zhang Joseph E. Gonzalez and Ion Stoica. 2023. Judging LLM-as-a-judge with MT-Bench and Chatbot Arena. arxiv: 2306.05685 [cs.CL]"},{"key":"e_1_3_2_1_95_1","volume-title":"Lima: Less is more for alignment. arXiv preprint arXiv:2305.11206","author":"Zhou Chunting","year":"2023","unstructured":"Chunting Zhou, Pengfei Liu, Puxin Xu, Srini Iyer, Jiao Sun, Yuning Mao, Xuezhe Ma, Avia Efrat, Ping Yu, Lili Yu, et al. 2023. Lima: Less is more for alignment. arXiv preprint arXiv:2305.11206 (2023)."},{"key":"e_1_3_2_1_96_1","volume-title":"Universal and transferable adversarial attacks on aligned language models. arXiv preprint arXiv:2307.15043","author":"Zou Andy","year":"2023","unstructured":"Andy Zou, Zifan Wang, J Zico Kolter, and Matt Fredrikson. 2023. Universal and transferable adversarial attacks on aligned language models. arXiv preprint arXiv:2307.15043 (2023)."}],"event":{"name":"KDD '24: The 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Barcelona Spain","acronym":"KDD '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671582","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3637528.3671582","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:04:19Z","timestamp":1750291459000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671582"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,24]]},"references-count":95,"alternative-id":["10.1145\/3637528.3671582","10.1145\/3637528"],"URL":"https:\/\/doi.org\/10.1145\/3637528.3671582","relation":{},"subject":[],"published":{"date-parts":[[2024,8,24]]},"assertion":[{"value":"2024-08-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}