{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T04:24:44Z","timestamp":1751430284173,"version":"3.41.0"},"publisher-location":"Singapore","reference-count":38,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819666010","type":"print"},{"value":"9789819665990","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-6599-0_21","type":"book-chapter","created":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T22:26:43Z","timestamp":1751408803000},"page":"304-317","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Progtuning: Progressive Fine-Tuning Framework for\u00a0Transformer-Based Language Models"],"prefix":"10.1007","author":[{"given":"Xiaoshuang","family":"Ji","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhendong","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaojun","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xin","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zeyao","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,7,2]]},"reference":[{"key":"21_CR1","unstructured":"Ba, J.L., Kiros, J.R., Hinton, G.E.: Layer normalization. arXiv preprint arXiv:1607.06450 (2016)"},{"key":"21_CR2","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., Mann, B., Ryder, N., Subbiah, M., Kaplan, J.D., Dhariwal, P., Neelakantan, A., Shyam, P., Sastry, G., Askell, A., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR3","unstructured":"Dai, A.M., Le, Q.V.: Semi-supervised sequence learning. In: Advances in Neural Information Processing Systems, vol. 28 (2015)"},{"key":"21_CR4","unstructured":"Dettmers, T., Lewis, M., Belkada, Y., Zettlemoyer, L.: Gpt3. int8 (): 8-bit matrix multiplication for transformers at scale. In: Advances in Neural Information Processing Systems, vol. 35, pp. 30318\u201330332 (2022)"},{"key":"21_CR5","unstructured":"Dettmers, T., Pagnoni, A., Holtzman, A., Zettlemoyer, L.: Qlora: efficient finetuning of quantized llms. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"21_CR6","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"21_CR7","unstructured":"Edalati, A., Tahaei, M., Kobyzev, I., Nia, V.P., Clark, J.J., Rezagholizadeh, M.: Krona: parameter efficient tuning with kronecker adapter. arXiv preprint arXiv:2212.10650 (2022)"},{"key":"21_CR8","unstructured":"Frantar, E., Alistarh, D.: Sparsegpt: massive language models can be accurately pruned in one-shot. In: International Conference on Machine Learning, pp. 10323\u201310337. PMLR (2023)"},{"key":"21_CR9","doi-asserted-by":"crossref","unstructured":"Guo, D., Rush, A.M., Kim, Y.: Parameter-efficient transfer learning with diff pruning. arXiv preprint arXiv:2012.07463 (2020)","DOI":"10.18653\/v1\/2021.acl-long.378"},{"key":"21_CR10","unstructured":"Han, Z., Gao, C., Liu, J., Zhang, S.Q., et\u00a0al.: Parameter-efficient fine-tuning for large models: a comprehensive survey. arXiv preprint arXiv:2403.14608 (2024)"},{"key":"21_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"21_CR12","unstructured":"Houlsby, N., et al.: Parameter-efficient transfer learning for nlp. In: International Conference on Machine Learning, pp. 2790\u20132799. PMLR (2019)"},{"key":"21_CR13","doi-asserted-by":"crossref","unstructured":"Howard, J., Ruder, S.: Universal language model fine-tuning for text classification. arXiv preprint arXiv:1801.06146 (2018)","DOI":"10.18653\/v1\/P18-1031"},{"key":"21_CR14","unstructured":"Hu, E.J., et al.: Lora: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"21_CR15","doi-asserted-by":"crossref","unstructured":"Jawahar, G., Sagot, B., Seddah, D.: What does bert learn about the structure of language? In: ACL 2019-57th Annual Meeting of the Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/P19-1356"},{"key":"21_CR16","unstructured":"Karras, T., Aila, T., Laine, S., Lehtinen, J.: Progressive growing of gans for improved quality, stability, and variation. arXiv preprint arXiv:1710.10196 (2017)"},{"key":"21_CR17","doi-asserted-by":"crossref","unstructured":"Lester, B., Al-Rfou, R., Constant, N.: The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"21_CR18","doi-asserted-by":"crossref","unstructured":"Li, X.L., Liang, P.: Prefix-tuning: optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190 (2021)","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"21_CR19","unstructured":"Li, Y., et al.: Loftq: lora-fine-tuning-aware quantization for large language models. arXiv preprint arXiv:2310.08659 (2023)"},{"key":"21_CR20","unstructured":"Lialin, V., Deshpande, V., Rumshisky, A.: Scaling down to scale up: a guide to parameter-efficient fine-tuning. arXiv preprint arXiv:2303.15647 (2023)"},{"key":"21_CR21","doi-asserted-by":"crossref","unstructured":"Liu, S.Y., Liu, Z., Huang, X., Dong, P., Cheng, K.T.: Llm-fp4: 4-bit floating-point quantized transformers. arXiv preprint arXiv:2310.16836 (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.39"},{"key":"21_CR22","doi-asserted-by":"crossref","unstructured":"Liu, X., et al.: P-tuning v2: prompt tuning can be comparable to fine-tuning universally across scales and tasks. arXiv preprint arXiv:2110.07602 (2021)","DOI":"10.18653\/v1\/2022.acl-short.8"},{"key":"21_CR23","doi-asserted-by":"crossref","unstructured":"Liu, X., et al.: Gpt understands, too. AI Open (2023)","DOI":"10.1016\/j.aiopen.2023.08.012"},{"key":"21_CR24","unstructured":"Liu, Y., et al.: Roberta: a robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"21_CR25","doi-asserted-by":"crossref","unstructured":"Mao, Y., Huang, K., Guan, C., Bao, G., Mo, F., Xu, J.: Dora: enhancing parameter-efficient fine-tuning with dynamic rank distribution. arXiv preprint arXiv:2405.17357 (2024)","DOI":"10.18653\/v1\/2024.acl-long.626"},{"issue":"10","key":"21_CR26","doi-asserted-by":"publisher","first-page":"1872","DOI":"10.1007\/s11431-020-1647-3","volume":"63","author":"XP Qiu","year":"2020","unstructured":"Qiu, X.P., Sun, T.X., Xu, Y.G., Shao, Y.F., Dai, N., Huang, X.J.: Pre-trained models for natural language processing: a survey. Sci. China Technol. Sci. 63(10), 1872\u20131897 (2020). https:\/\/doi.org\/10.1007\/s11431-020-1647-3","journal-title":"Sci. China Technol. Sci."},{"issue":"8","key":"21_CR27","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I., et al.: Language models are unsupervised multitask learners. OpenAI blog 1(8), 9 (2019)","journal-title":"OpenAI blog"},{"issue":"1","key":"21_CR28","first-page":"5485","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21(1), 5485\u20135551 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"21_CR29","doi-asserted-by":"crossref","unstructured":"Rajpurkar, P., Zhang, J., Lopyrev, K., Liang, P.: Squad: 100,000+ questions for machine comprehension of text. arXiv preprint arXiv:1606.05250 (2016)","DOI":"10.18653\/v1\/D16-1264"},{"key":"21_CR30","unstructured":"Sun, M., Liu, Z., Bair, A., Kolter, J.Z.: A simple and effective pruning approach for large language models. arXiv preprint arXiv:2306.11695 (2023)"},{"key":"21_CR31","unstructured":"Touvron, H., et\u00a0al.: Llama: open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)"},{"key":"21_CR32","doi-asserted-by":"crossref","unstructured":"Vucetic, D., Tayaranian, M., Ziaeefard, M., Clark, J.J., Meyer, B.H., Gross, W.J.: Efficient fine-tuning of bert models on the edge. In: 2022 IEEE International Symposium on Circuits and Systems (ISCAS), pp. 1838\u20131842. IEEE (2022)","DOI":"10.1109\/ISCAS48785.2022.9937567"},{"key":"21_CR33","doi-asserted-by":"crossref","unstructured":"Wang, A., Singh, A., Michael, J., Hill, F., Levy, O., Bowman, S.R.: Glue: a multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:1804.07461 (2018)","DOI":"10.18653\/v1\/W18-5446"},{"key":"21_CR34","doi-asserted-by":"crossref","unstructured":"Wang, Y., Perazzi, F., McWilliams, B., Sorkine-Hornung, A., Sorkine-Hornung, O., Schroers, C.: A fully progressive approach to single-image super-resolution. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 864\u2013873 (2018)","DOI":"10.1109\/CVPRW.2018.00131"},{"key":"21_CR35","unstructured":"Wolf, T., et\u00a0al.: Huggingface\u2019s transformers: state-of-the-art natural language processing. arXiv preprint arXiv:1910.03771 (2019)"},{"key":"21_CR36","unstructured":"Wolf, T., et\u00a0al.: Transformers: state-of-the-art natural language processing. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pp. 38\u201345 (2020)"},{"key":"21_CR37","doi-asserted-by":"crossref","unstructured":"Wu, R., Zhang, G., Lu, S., Chen, T.: Cascade ef-gan: progressive facial expression editing with local focuses. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5021\u20135030 (2020)","DOI":"10.1109\/CVPR42600.2020.00507"},{"key":"21_CR38","unstructured":"Zaken, E.B., Ravfogel, S., Goldberg, Y.: Bitfit: simple parameter-efficient fine-tuning for transformer-based masked language-models. arXiv preprint arXiv:2106.10199 (2021)"}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-6599-0_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T22:26:51Z","timestamp":1751408811000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-6599-0_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819666010","9789819665990"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-6599-0_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"2 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Auckland","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"New Zealand","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iconip2024.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}