{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,26]],"date-time":"2025-06-26T12:10:02Z","timestamp":1750939802428,"version":"3.41.0"},"publisher-location":"Singapore","reference-count":34,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819615278"},{"type":"electronic","value":"9789819615285"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-1528-5_13","type":"book-chapter","created":{"date-parts":[[2025,2,14]],"date-time":"2025-02-14T17:23:25Z","timestamp":1739553805000},"page":"185-203","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["FEDNPAIT: Federated Learning with\u00a0NADAM and\u00a0PADAM for\u00a0Instruction Tuning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0563-6396","authenticated-orcid":false,"given":"Zhipeng","family":"Gao","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4068-6960","authenticated-orcid":false,"given":"Yichen","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3404-5373","authenticated-orcid":false,"given":"Xinlei","family":"Yu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,2,15]]},"reference":[{"key":"13_CR1","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"13_CR2","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I., et al.: Improving language understanding by generative pre-training. OpenAI (2018)"},{"key":"13_CR3","unstructured":"Touvron, H., et al.: Llama 2: open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)"},{"issue":"240","key":"13_CR4","first-page":"1","volume":"24","author":"H Touvron","year":"2023","unstructured":"Touvron, H., Martin, L., Stone, K., Albert, P., Almahairi, A., Babaei, Y.: Palm: scaling language modeling with pathways. J. Mach. Learn. Res. 24(240), 1\u2013113 (2023)","journal-title":"J. Mach. Learn. Res."},{"key":"13_CR5","unstructured":"Li, Z., Haroutunian, L., Tumuluri, R., Cohen, P., Haffari, G.: Improving cross-domain low-resource text generation through LLM post-editing: a programmer-interpreter approach. arXiv preprint arXiv:2402.04609 (2024)"},{"key":"13_CR6","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"13_CR7","unstructured":"Zhang, S., et al.: Instruction tuning for large language models: a survey. arXiv preprint arXiv:2308.10792 (2023)"},{"key":"13_CR8","unstructured":"Xia, M., Malladi, S., Gururangan, S., Arora, S., Chen, D.: Less: selecting influential data for targeted instruction tuning. arXiv preprint arXiv:2402.04333 (2024)"},{"issue":"1","key":"13_CR9","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1257\/pol.20210309","volume":"16","author":"SG Goldberg","year":"2024","unstructured":"Goldberg, S.G., Johnson, G.A., Shriver, S.K.: Regulating privacy online: an economic evaluation of the GDPR. Am. Econ. J. Econ. Pol. 16(1), 325\u2013358 (2024)","journal-title":"Am. Econ. J. Econ. Pol."},{"key":"13_CR10","doi-asserted-by":"crossref","unstructured":"Yao, Y., Duan, J., Xu, K., Cai, Y., Sun, Z., Zhang, Y.: A survey on large language model (LLM) security and privacy: the good, the bad, and the ugly. High-Conf. Comput. 100211 (2024)","DOI":"10.1016\/j.hcc.2024.100211"},{"key":"13_CR11","unstructured":"Kone\u010dn\u00fd, J., McMahan, H.B., Yu, F.X., Richt\u00e1rik, P., Suresh, A.T., Bacon, D.: Federated learning: strategies for improving communication efficiency. arXiv preprint arXiv:1610.05492 (2016)"},{"key":"13_CR12","unstructured":"Karimireddy, S.P., Kale, S., Mohri, M., Reddi, S.J., Stich, S.U., Suresh, A.T.: Scaffold: stochastic controlled averaging for on-device federated learning, 2(6) (2019). arXiv preprint arXiv:1910.06378"},{"key":"13_CR13","unstructured":"Reddi, S., et al.: Adaptive federated optimization. arXiv preprint arXiv:2003.00295 (2020)"},{"key":"13_CR14","unstructured":"Dozat, T.: Incorporating nesterov momentum into adam (2016)"},{"key":"13_CR15","unstructured":"Chen, J., Zhou, D., Tang, Y., Yang, Z., Cao, Y., Gu, Q.: Closing the generalization gap of adaptive gradient methods in training deep neural networks. arXiv preprint arXiv:1806.06763 (2018)"},{"key":"13_CR16","unstructured":"Hu, E.J., et al.: LoRA: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"13_CR17","unstructured":"Wu, J., Gan, W., Chen, Z., Wan, S., Lin, H.: AI-generated content (AIGC): a survey. arXiv preprint arXiv:2304.06632 (2023)"},{"issue":"8","key":"13_CR18","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I.: Language models are unsupervised multitask learners. OpenAI blog 1(8), 9 (2019)","journal-title":"OpenAI blog"},{"key":"13_CR19","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, N.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"13_CR20","unstructured":"Devlin, J., Chang, M. W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"13_CR21","unstructured":"Wei, J., et al.: Finetuned language models are zero-shot learners. arXiv preprint arXiv:2109.01652 (2021)"},{"key":"13_CR22","unstructured":"Kaplan, J., et al.: Scaling laws for neural language models. arXiv preprint arXiv:2001.08361 (2020)"},{"key":"13_CR23","unstructured":"Azerbayev, Z., et al.: Llemma: an open language model for mathematics. arXiv preprint arXiv:2310.10631 (2023)"},{"key":"13_CR24","unstructured":"Villalobos, P., Sevilla, J., Heim, L., Besiroglu, T., Hobbhahn, M., Ho, A.: Will we run out of data? an analysis of the limits of scaling datasets in machine learning. arXiv preprint arXiv:2211.04325 (2022)"},{"key":"13_CR25","doi-asserted-by":"publisher","first-page":"1821","DOI":"10.1109\/ACCESS.2016.2558446","volume":"4","author":"A Mehmood","year":"2016","unstructured":"Mehmood, A., Natgunanathan, I., Xiang, Y., Hua, G., Guo, S.: Protection of big data privacy. IEEE Access 4, 1821\u20131834 (2016)","journal-title":"IEEE Access"},{"key":"13_CR26","unstructured":"Li, X., Huang, K., Yang, W., Wang, S., Zhang, Z.: On the convergence of FedAvg on non-iid data. arXiv preprint arXiv:1907.02189 (2019)"},{"key":"13_CR27","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"13_CR28","unstructured":"Fan, T., et al.: Fate-LLM: a industrial grade federated learning framework for large language models. arXiv preprint arXiv:2310.10049 (2023)"},{"key":"13_CR29","doi-asserted-by":"crossref","unstructured":"Zhang, J., et al.: Towards building the federatedGPT: federated instruction tuning. In: ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6915\u20136919. IEEE (2024)","DOI":"10.1109\/ICASSP48485.2024.10447454"},{"key":"13_CR30","doi-asserted-by":"crossref","unstructured":"Ye, R., et al.: OpenFedLLM: training large language models on decentralized private data via federated learning. arXiv preprint arXiv:2402.06954 (2024)","DOI":"10.1145\/3637528.3671582"},{"key":"13_CR31","unstructured":"Taori, R., et al.: Stanford alpaca: an instruction-following llama model. (2023)"},{"key":"13_CR32","unstructured":"Hendrycks, D., et al.: Measuring massive multitask language understanding. arXiv preprint arXiv:2009.03300 (2020)"},{"key":"13_CR33","unstructured":"Zheng, L., et al.: Judging LLM-as-a-judge with MT-bench and chatbot arena. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"13_CR34","unstructured":"Cowen, T.: Introducing GPT-4o (2024)"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-1528-5_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,26]],"date-time":"2025-06-26T11:29:14Z","timestamp":1750937354000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-1528-5_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819615278","9789819615285"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-1528-5_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"15 February 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICA3PP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Algorithms and Architectures for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Macau","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ica3pp2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ica3pp2024.scimeeting.cn\/en\/web\/index\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}