{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T17:28:42Z","timestamp":1743096522162,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":41,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819783663"},{"type":"electronic","value":"9789819783670"}],"license":[{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8367-0_32","type":"book-chapter","created":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T11:56:28Z","timestamp":1732794988000},"page":"540-554","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Pattern Shifting or\u00a0Knowledge Losing? A Forgetting Perspective for\u00a0Understanding the\u00a0Effect of\u00a0Instruction Fine-Tuning"],"prefix":"10.1007","author":[{"given":"Chunkang","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Boxi","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Yaojie","family":"Lu","sequence":"additional","affiliation":[]},{"given":"Hongyu","family":"Lin","sequence":"additional","affiliation":[]},{"given":"Liu","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Ke","family":"Zeng","sequence":"additional","affiliation":[]},{"given":"Guanglu","family":"Wan","sequence":"additional","affiliation":[]},{"given":"Xunliang","family":"Cai","sequence":"additional","affiliation":[]},{"given":"Xianpei","family":"Han","sequence":"additional","affiliation":[]},{"given":"Le","family":"Sun","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,29]]},"reference":[{"key":"32_CR1","doi-asserted-by":"publisher","unstructured":"Andersen, J.S., Maalej, W.: Efficient, uncertainty-based moderation of neural networks text classifiers. In: Findings of the Association for Computational Linguistics: ACL 2022, pp. 1536\u20131546. Association for Computational Linguistics, Dublin, Ireland (2022). https:\/\/doi.org\/10.18653\/v1\/2022.findings-acl.121","DOI":"10.18653\/v1\/2022.findings-acl.121"},{"key":"32_CR2","unstructured":"Bai, Y., et al.: Training a helpful and harmless assistant with reinforcement learning from human feedback (2022)"},{"key":"32_CR3","unstructured":"Burns, C., et al.: Weak-to-strong generalization: eliciting strong capabilities with weak supervision (2023)"},{"key":"32_CR4","doi-asserted-by":"crossref","unstructured":"Cha, H., Lee, J., Shin, J.: Co2l: contrastive continual learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9516\u20139525 (2021)","DOI":"10.1109\/ICCV48922.2021.00938"},{"key":"32_CR5","unstructured":"Cha, J., et al.: Swad: domain generalization by seeking flat minima (2021)"},{"key":"32_CR6","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding (2019)"},{"issue":"3","key":"32_CR7","first-page":"265","volume":"11","author":"G Fischer","year":"2000","unstructured":"Fischer, G.: Lifelong learning-more than training. J. Interact. Learn. Res. 11(3), 265\u2013294 (2000)","journal-title":"J. Interact. Learn. Res."},{"issue":"4","key":"32_CR8","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1016\/S1364-6613(99)01294-2","volume":"3","author":"RM French","year":"1999","unstructured":"French, R.M.: Catastrophic forgetting in connectionist networks. Trends Cogn. Sci. 3(4), 128\u2013135 (1999)","journal-title":"Trends Cogn. Sci."},{"key":"32_CR9","doi-asserted-by":"crossref","unstructured":"Goyal, S., Kumar, A., Garg, S., Kolter, Z., Raghunathan, A.: Finetune like you pretrain: improved finetuning of zero-shot vision models (2022)","DOI":"10.1109\/CVPR52729.2023.01853"},{"key":"32_CR10","doi-asserted-by":"crossref","unstructured":"Gupta, P., Jiao, C., Yeh, Y.T., Mehri, S., Eskenazi, M., Bigham, J.P.: Instructdial: improving zero and few-shot generalization in dialogue through instruction tuning (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.33"},{"key":"32_CR11","unstructured":"Hendrycks, D., et al.: Measuring massive multitask language understanding (2021)"},{"key":"32_CR12","unstructured":"Jain, S., et al.: Mechanistically analyzing the effects of fine-tuning on procedurally defined tasks (2023)"},{"key":"32_CR13","doi-asserted-by":"publisher","unstructured":"Kirkpatrick, J., et al.: Overcoming catastrophic forgetting in neural networks. Proc. Natl. Acad. Sci. 114(13), 3521\u20133526 (2017). https:\/\/doi.org\/10.1073\/pnas.1611835114","DOI":"10.1073\/pnas.1611835114"},{"key":"32_CR14","unstructured":"Kotha, S., Springer, J.M., Raghunathan, A.: Understanding Catastrophic Forgetting in Language Models via Implicit Inference (2023). http:\/\/arxiv.org\/abs\/2309.10105, arXiv:2309.10105 [cs]"},{"key":"32_CR15","unstructured":"Kumar, A., Raghunathan, A., Jones, R., Ma, T., Liang, P.: Fine-tuning can distort pretrained features and underperform out-of-distribution (2022)"},{"key":"32_CR16","unstructured":"Kung, P.N., Peng, N.: Do Models Really Learn to Follow Instructions? An Empirical Study of Instruction Tuning (2023). http:\/\/arxiv.org\/abs\/2305.11383, arXiv:2305.11383 [cs]"},{"key":"32_CR17","unstructured":"Lin, Y., et al.: Speciality vs Generality: An Empirical Study on Catastrophic Forgetting in Fine-tuning Foundation Models (2023). http:\/\/arxiv.org\/abs\/2309.06256, arXiv:2309.06256 [cs]"},{"key":"32_CR18","unstructured":"Luo, Y., Yang, Z., Meng, F., Li, Y., Zhou, J., Zhang, Y.: An empirical study of catastrophic forgetting in large language models during continual fine-tuning. arXiv preprint arXiv:2308.08747 (2023)"},{"key":"32_CR19","doi-asserted-by":"crossref","unstructured":"McClelland, J.L., McNaughton, B.L., O\u2019Reilly, R.C.: Why there are complementary learning systems in the hippocampus and neocortex: insights from the successes and failures of connectionist models of learning and memory. Psychol. Rev. 102(3), 419 (1995)","DOI":"10.1037\/\/0033-295X.102.3.419"},{"key":"32_CR20","doi-asserted-by":"crossref","unstructured":"McCloskey, M., Cohen, N.J.: Catastrophic interference in connectionist networks: The sequential learning problem. In: Psychology of learning and motivation, vol.\u00a024, pp. 109\u2013165. Elsevier (1989)","DOI":"10.1016\/S0079-7421(08)60536-8"},{"key":"32_CR21","unstructured":"Min, T.J.C.: An approach to solving the abstraction and reasoning corpus (arc) challenge (2023)"},{"key":"32_CR22","unstructured":"OpenAI Achiam, J., et al.: Gpt-4 technical report (2023)"},{"key":"32_CR23","unstructured":"Ouyang, L., et al.: Training language models to follow instructions with human feedback (2022)"},{"key":"32_CR24","unstructured":"Peng, B., Risteski, A.: Continual learning: a feature extraction formalization, an efficient algorithm, and fundamental obstructions. Adv. Neural. Inf. Process. Syst. 35, 28414\u201328427 (2022)"},{"key":"32_CR25","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision (2021)"},{"key":"32_CR26","doi-asserted-by":"crossref","unstructured":"Rebuffi, S.A., Kolesnikov, A., Sperl, G., Lampert, C.H.: ICARL: incremental classifier and representation learning (2017)","DOI":"10.1109\/CVPR.2017.587"},{"key":"32_CR27","doi-asserted-by":"publisher","unstructured":"Scialom, T., Chakrabarty, T., Muresan, S.: Fine-tuned language models are continual learners. In: Goldberg, Y., Kozareva, Z., Zhang, Y. (eds.) Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pp. 6107\u20136122. Association for Computational Linguistics, Abu Dhabi, United Arab Emirates (2022). https:\/\/doi.org\/10.18653\/v1\/2022.emnlp-main.410, https:\/\/aclanthology.org\/2022.emnlp-main.410","DOI":"10.18653\/v1\/2022.emnlp-main.410"},{"key":"32_CR28","unstructured":"Silver, D.L., Yang, Q., Li, L.: Lifelong machine learning systems: beyond learning algorithms. In: 2013 AAAI spring symposium series (2013)"},{"key":"32_CR29","unstructured":"Taori, R., et al.: Stanford alpaca: an instruction-following llama model. https:\/\/github.com\/tatsu-lab\/stanford_alpaca (2023)"},{"key":"32_CR30","unstructured":"Tirumala, K., Markosyan, A.H., Zettlemoyer, L., Aghajanyan, A.: Memorization Without Overfitting: Analyzing the Training Dynamics of Large Language Models (2022). http:\/\/arxiv.org\/abs\/2205.10770, arXiv:2205.10770 [cs]"},{"key":"32_CR31","unstructured":"Touvron, H., et al.: LLaMA: Open and Efficient Foundation Language Models (2023). http:\/\/arxiv.org\/abs\/2302.13971, arXiv:2302.13971 [cs]"},{"key":"32_CR32","unstructured":"Touvron, H., et al.: Llama 2: Open Foundation and Fine-Tuned Chat Models (2023). http:\/\/arxiv.org\/abs\/2307.09288, arXiv:2307.09288 [cs]"},{"key":"32_CR33","unstructured":"Wang, H., Liu, C., Xi, N., Qiang, Z., Zhao, S., Qin, B., Liu, T.: Huatuo: tuning llama model with Chinese medical knowledge (2023)"},{"key":"32_CR34","doi-asserted-by":"publisher","unstructured":"Wu, H., Tan, H., Xu, K., Liu, S., Wu, L., Song, L.: Zero-shot cross-lingual conversational semantic role labeling. In: Findings of the Association for Computational Linguistics: NAACL 2022, pp. 269\u2013281. Association for Computational Linguistics, Seattle, United States (2022).https:\/\/doi.org\/10.18653\/v1\/2022.findings-naacl.20","DOI":"10.18653\/v1\/2022.findings-naacl.20"},{"key":"32_CR35","doi-asserted-by":"crossref","unstructured":"Xia, M., et al.: Training trajectories of language models across scales (2023)","DOI":"10.18653\/v1\/2023.acl-long.767"},{"issue":"4","key":"32_CR36","doi-asserted-by":"publisher","first-page":"421","DOI":"10.4103\/0970-9185.194772","volume":"32","author":"LN Yaddanapudi","year":"2016","unstructured":"Yaddanapudi, L.N.: The American statistical association statement on p-values explained. J. Anaesthesiol. Clin. Pharmacol. 32(4), 421 (2016)","journal-title":"J. Anaesthesiol. Clin. Pharmacol."},{"key":"32_CR37","doi-asserted-by":"crossref","unstructured":"Yin, F., Vig, J., Laban, P., Joty, S., Xiong, C., Wu, C.S.J.: Did you read the instructions? Rethinking the effectiveness of task definitions in instruction learning (2023)","DOI":"10.18653\/v1\/2023.acl-long.172"},{"key":"32_CR38","unstructured":"Zeng, S., et al.: Exploring memorization in fine-tuned language models (2023)"},{"key":"32_CR39","unstructured":"Zhang, M., R\u00e9, C.: Contrastive adapters for foundation model group robustness (2022)"},{"key":"32_CR40","doi-asserted-by":"crossref","unstructured":"Zhao, Y., et al.: Pytorch fsdp: experiences on scaling fully sharded data parallel (2023)","DOI":"10.14778\/3611540.3611569"},{"key":"32_CR41","unstructured":"Zheng, L., et al.: Judging llm-as-a-judge with mt-bench and chatbot arena (2023)"}],"container-title":["Lecture Notes in Computer Science","Chinese Computational Linguistics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8367-0_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T12:09:20Z","timestamp":1732795760000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8367-0_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,29]]},"ISBN":["9789819783663","9789819783670"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8367-0_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,29]]},"assertion":[{"value":"29 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CCL","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China National Conference on Chinese Computational Linguistics","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taiyuan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 July 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 July 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cncl2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/cips-cl.org\/static\/CCL2024\/en\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}