{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,24]],"date-time":"2026-06-24T20:10:03Z","timestamp":1782331803124,"version":"3.54.5"},"reference-count":272,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2025,5,3]],"date-time":"2025-05-03T00:00:00Z","timestamp":1746230400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,5,3]],"date-time":"2025-05-03T00:00:00Z","timestamp":1746230400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2023YFE0108600"],"award-info":[{"award-number":["2023YFE0108600"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U22A6001"],"award-info":[{"award-number":["U22A6001"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shanghai Artificial Intelligence Laboratory","award":["P22KN00581"],"award-info":[{"award-number":["P22KN00581"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Intell Rev"],"DOI":"10.1007\/s10462-025-11236-4","type":"journal-article","created":{"date-parts":[[2025,5,2]],"date-time":"2025-05-02T23:55:20Z","timestamp":1746230120000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":108,"title":["Parameter-efficient fine-tuning in large language models: a survey of methodologies"],"prefix":"10.1007","volume":"58","author":[{"given":"Luping","family":"Wang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sheng","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Linnan","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shu","family":"Pan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Runze","family":"Cai","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sen","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fei","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,5,3]]},"reference":[{"key":"11236_CR1","doi-asserted-by":"crossref","unstructured":"Abadi M, Chu A, Goodfellow I et\u00a0al (2016) Deep learning with differential privacy. In: Proceedings of the 2016 ACM SIGSAC conference on computer and communications security, pp 308\u2013318","DOI":"10.1145\/2976749.2978318"},{"key":"11236_CR2","unstructured":"Achiam J, Adler S, Agarwal S et\u00a0al (2023) GPT-4 technical report. arXiv preprint. arXiv:2303.08774"},{"key":"11236_CR3","doi-asserted-by":"crossref","unstructured":"Aghajanyan A, Zettlemoyer L, Gupta S (2020) Intrinsic dimensionality explains the effectiveness of language model fine-tuning. arXiv preprint. arXiv:2012.13255","DOI":"10.18653\/v1\/2021.acl-long.568"},{"key":"11236_CR4","doi-asserted-by":"crossref","unstructured":"Aghajanyan A, Gupta A, Shrivastava A et\u00a0al (2021) MUPPET: massive multi-task representations with pre-finetuning. arXiv preprint. arXiv:2101.11038","DOI":"10.18653\/v1\/2021.emnlp-main.468"},{"key":"11236_CR5","doi-asserted-by":"crossref","unstructured":"Agiza A, Neseem M, Reda S (2024) MTLORA: a low-rank adaptation approach for efficient multi-task learning. arXiv preprint. arXiv:2403.20320","DOI":"10.1109\/CVPR52733.2024.01533"},{"key":"11236_CR6","unstructured":"Ahn J, Verma R, Lou R et\u00a0al (2024) Large language models for mathematical reasoning: progresses and challenges. arXiv preprint. arXiv:2402.00157"},{"key":"11236_CR7","first-page":"23716","volume":"35","author":"JB Alayrac","year":"2022","unstructured":"Alayrac JB, Donahue J, Luc P et al (2022) FLAMINGO: a visual language model for few-shot learning. Adv Neural Inf Process Syst 35:23716\u201323736","journal-title":"Adv Neural Inf Process Syst"},{"key":"11236_CR8","unstructured":"Anil R, Borgeaud S, Wu Y et\u00a0al (2023) Gemini: A family of highly capable multimodal models. arXiv preprint. arXiv:2312.11805 1"},{"key":"11236_CR9","doi-asserted-by":"crossref","unstructured":"Ansell A, Ponti EM, Korhonen A et\u00a0al (2021) Composable sparse fine-tuning for cross-lingual transfer. arXiv preprint. arXiv:2110.07560","DOI":"10.18653\/v1\/2022.acl-long.125"},{"key":"11236_CR10","unstructured":"Anthropic (Online) Claude. https:\/\/www.anthropic.com\/claude. Accessed 11 Feb 2025"},{"key":"11236_CR11","unstructured":"Aribandi V, Tay Y, Schuster T et\u00a0al (2021) EXT5: towards extreme multi-task scaling for transfer learning. arXiv preprint. arXiv:2111.10952"},{"key":"11236_CR12","doi-asserted-by":"crossref","unstructured":"Asai A, Salehi M, Peters ME et\u00a0al (2022) Attempt: parameter-efficient multi-task tuning via attentional mixtures of soft prompts. In: Proceedings of the 2022 conference on empirical methods in natural language processing, pp 6655\u20136672","DOI":"10.18653\/v1\/2022.emnlp-main.446"},{"key":"11236_CR13","unstructured":"Austin J, Odena A, Nye M et\u00a0al (2021) Program synthesis with large language models. arXiv preprint. arXiv:2108.07732"},{"key":"11236_CR14","doi-asserted-by":"crossref","unstructured":"Bach SH, Sanh V, Yong ZX et\u00a0al (2022) Promptsource: an integrated development environment and repository for natural language prompts. arXiv preprint. arXiv:2202.01279","DOI":"10.18653\/v1\/2022.acl-demo.9"},{"key":"11236_CR15","unstructured":"Bahng H, Jahanian A, Sankaranarayanan S et\u00a0al (2022) Exploring visual prompts for adapting large-scale models. arXiv preprint. arXiv:2203.17274"},{"key":"11236_CR16","unstructured":"Bai Y, Jones A, Ndousse K et\u00a0al (2022a) Training a helpful and harmless assistant with reinforcement learning from human feedback. arXiv preprint. arXiv:2204.05862"},{"key":"11236_CR17","unstructured":"Bai Y, Kadavath S, Kundu S et\u00a0al (2022b) Constitutional ai: Harmlessness from ai feedback. arXiv preprint. arXiv:2212.08073"},{"key":"11236_CR18","doi-asserted-by":"crossref","unstructured":"Baumgartner J, Zannettou S, Keegan B et\u00a0al (2020) The pushshift reddit dataset. In: ICWSM. AAAI Press, pp 830\u2013839","DOI":"10.1609\/icwsm.v14i1.7347"},{"key":"11236_CR19","doi-asserted-by":"crossref","unstructured":"Bender EM, Gebru T, McMillan-Major A et\u00a0al (2021) On the dangers of stochastic parrots: Can language models be too big? In: Proceedings of the 2021 ACM conference on fairness, accountability, and transparency, pp 610\u2013623","DOI":"10.1145\/3442188.3445922"},{"key":"11236_CR20","unstructured":"Bi X, Chen D, Chen G et\u00a0al (2024) Deepseek llm: Scaling open-source language models with longtermism. arXiv preprint. arXiv:2401.02954"},{"issue":"5","key":"11236_CR21","doi-asserted-by":"publisher","first-page":"868","DOI":"10.1007\/s10439-023-03172-7","volume":"51","author":"SS Biswas","year":"2023","unstructured":"Biswas SS (2023) Role of Chat GPT in public health. Ann Biomed Eng 51(5):868\u2013869","journal-title":"Ann Biomed Eng"},{"key":"11236_CR22","unstructured":"Bommasani R, Hudson DA, Adeli E et\u00a0al (2021) On the opportunities and risks of foundation models. arXiv preprint. arXiv:2108.07258"},{"key":"11236_CR23","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown T, Mann B, Ryder N et al (2020) Language models are few-shot learners. Adv Neural Inf Process Syst 33:1877\u20131901","journal-title":"Adv Neural Inf Process Syst"},{"key":"11236_CR24","doi-asserted-by":"crossref","unstructured":"Cao J, Prakash CS, Hamza W (2022) Attention fusion: a light yet efficient late fusion mechanism for task adaptation in NLU. In: Findings of the Association for Computational Linguistics: NAACL 2022, pp 857\u2013866","DOI":"10.18653\/v1\/2022.findings-naacl.64"},{"key":"11236_CR29","doi-asserted-by":"crossref","unstructured":"Chen S, Hou Y, Cui Y et\u00a0al (2020) Recall and learn: fine-tuning deep pretrained language models with less forgetting. arXiv preprint. arXiv:2004.12651","DOI":"10.18653\/v1\/2020.emnlp-main.634"},{"key":"11236_CR28","unstructured":"Chen M, Tworek J, Jun H et\u00a0al (2021) Evaluating large language models trained on code. arXiv preprint. arXiv:2107.03374"},{"key":"11236_CR30","unstructured":"Chen S, Ge C, Tong Z et al (2022a) ADAPTFORMER: adapting vision transformers for scalable visual recognition. Adv Neural Inf Process Syst 35:16664\u201316678"},{"key":"11236_CR33","doi-asserted-by":"crossref","unstructured":"Chen Y, Hazarika D, Namazifar M et al (2022b) Empowering parameter-efficient transfer learning by recognizing the kernel structure in self-attention. In: Findings of the Association for Computational Linguistics: NAACL 2022, pp 1375\u20131388","DOI":"10.18653\/v1\/2022.findings-naacl.102"},{"key":"11236_CR35","unstructured":"Chen Z, Duan Y, Wang W et\u00a0al (2022c) Vision transformer adapter for dense predictions. arXiv preprint. arXiv:2205.08534"},{"key":"11236_CR25","doi-asserted-by":"crossref","unstructured":"Chen A, Yao Y, Chen PY et\u00a0al (2023a) Understanding and improving visual prompting: a label-mapping perspective. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 19133\u201319143","DOI":"10.1109\/CVPR52729.2023.01834"},{"key":"11236_CR26","unstructured":"Chen J, Zhang A, Shi X et\u00a0al (2023b) Parameter-efficient fine-tuning design spaces. arXiv preprint. arXiv:2301.01821"},{"key":"11236_CR27","doi-asserted-by":"crossref","unstructured":"Chen L, Huang H, Cheng M (2023c) PTP: boosting stability and performance of prompt tuning with perturbation-based regularizer. arXiv preprint. arXiv:2305.02423","DOI":"10.18653\/v1\/2023.emnlp-main.833"},{"key":"11236_CR31","doi-asserted-by":"crossref","unstructured":"Chen W, Yin M, Ku M et\u00a0al (2023d) THEOREMQA: a theorem-driven question answering dataset. In: EMNLP. Association for Computational Linguistics, pp 7889\u20137901","DOI":"10.18653\/v1\/2023.emnlp-main.489"},{"key":"11236_CR34","doi-asserted-by":"crossref","unstructured":"Chen Y, Fu Q, Fan G et\u00a0al (2023e) Hadamard adapter: an extreme parameter-efficient adapter tuning method for pre-trained language models. In: Proceedings of the 32nd ACM international conference on information and knowledge management, pp 276\u2013285","DOI":"10.1145\/3583780.3614904"},{"key":"11236_CR32","doi-asserted-by":"crossref","unstructured":"Chen X, Liu J, Wang Y et\u00a0al (2024) SUPERLORA: parameter-efficient unified adaptation of multi-layer attention modules. arXiv preprint. arXiv:2403.11887","DOI":"10.1109\/CVPRW63382.2024.00804"},{"key":"11236_CR36","doi-asserted-by":"crossref","unstructured":"Cherti M, Beaumont R, Wightman R et\u00a0al (2023) Reproducible scaling laws for contrastive language-image learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 2818\u20132829","DOI":"10.1109\/CVPR52729.2023.00276"},{"key":"11236_CR37","unstructured":"Cho J, Lei J, Tan H et\u00a0al (2021) Unifying vision-and-language tasks via text generation. In: International conference on machine learning, PMLR, pp 1931\u20131942"},{"key":"11236_CR38","doi-asserted-by":"crossref","unstructured":"Choi JY, Kim J, Park JH et\u00a0al (2023) SMOP: towards efficient and effective prompt tuning with sparse mixture-of-prompts. In: The 2023 conference on empirical methods in natural language processing","DOI":"10.18653\/v1\/2023.emnlp-main.884"},{"issue":"240","key":"11236_CR39","first-page":"1","volume":"24","author":"A Chowdhery","year":"2023","unstructured":"Chowdhery A, Narang S, Devlin J et al (2023) PALM: scaling language modeling with pathways. J Mach Learn Res 24(240):1\u2013113","journal-title":"J Mach Learn Res"},{"key":"11236_CR40","unstructured":"Christiano PF, Leike J, Brown T et\u00a0al (2017) Deep reinforcement learning from human preferences. In: Advances in neural information processing systems, vol 30"},{"key":"11236_CR41","doi-asserted-by":"crossref","unstructured":"Chronopoulou A, Peters ME, Fraser A et\u00a0al (2023) Adaptersoup: Weight averaging to improve generalization of pretrained language models. arXiv preprint. arXiv:2302.07027","DOI":"10.18653\/v1\/2023.findings-eacl.153"},{"key":"11236_CR42","unstructured":"Chung HW, Hou L, Longpre S et\u00a0al (2022) Scaling instruction-finetuned language models. arXiv preprint. arXiv:2210.11416"},{"issue":"70","key":"11236_CR43","first-page":"1","volume":"25","author":"HW Chung","year":"2024","unstructured":"Chung HW, Hou L, Longpre S et al (2024) Scaling instruction-finetuned language models. J Mach Learn Res 25(70):1\u201353","journal-title":"J Mach Learn Res"},{"key":"11236_CR44","unstructured":"Clark P, Cowhey I, Etzioni O et\u00a0al (2018) Think you have solved question answering? Try arc, the AI2 reasoning challenge. arXiv preprint. arXiv:1803.05457v1"},{"key":"11236_CR45","unstructured":"Cobbe K, Kosaraju V, Bavarian M et\u00a0al (2021) Training verifiers to solve math word problems. arXiv preprint. arXiv:2110.14168"},{"key":"11236_CR46","doi-asserted-by":"crossref","unstructured":"Dai D, Deng C, Zhao C et\u00a0al (2024a) DeepSeekMoE: towards ultimate expert specialization in mixture-of-experts language models. arXiv preprint. arXiv:2401.06066","DOI":"10.18653\/v1\/2024.acl-long.70"},{"key":"11236_CR47","unstructured":"Dai W, Li J, Li D et\u00a0al (2024b) InstructBLIP: towards general-purpose vision-language models with instruction tuning. In: Advances in Neural Information Processing Systems, vol 36"},{"key":"11236_CR48","unstructured":"Dan Y, Lei Z, Gu Y et\u00a0al (2023) Educhat: a large-scale language model-based chatbot system for intelligent education. arXiv preprint. arXiv:2308.02773"},{"key":"11236_CR49","doi-asserted-by":"crossref","unstructured":"Das SSS, Zhang RH, Shi P et\u00a0al (2023) Unified low-resource sequence labeling by sample-aware dynamic sparse finetuning. arXiv preprint. arXiv:2311.03748","DOI":"10.18653\/v1\/2023.emnlp-main.433"},{"key":"11236_CR50","unstructured":"Dettmers T, Pagnoni A, Holtzman A et\u00a0al (2024) QLORA: efficient finetuning of quantized LLMS. In: Advances in neural information processing systems. vol 36"},{"key":"11236_CR51","unstructured":"Devlin J, Chang MW, Lee K et\u00a0al (2018) BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint. arXiv:1810.04805"},{"key":"11236_CR52","doi-asserted-by":"crossref","unstructured":"Ding N, Qin Y, Yang G et\u00a0al (2022) Delta tuning: a comprehensive study of parameter efficient methods for pre-trained language models. arXiv preprint. arXiv:2203.06904","DOI":"10.21203\/rs.3.rs-1553541\/v1"},{"key":"11236_CR53","doi-asserted-by":"crossref","unstructured":"Ding N, Lv X, Wang Q et\u00a0al (2023) Sparse low-rank adaptation of pre-trained language models. In: Proceedings of the 2023 conference on empirical methods in natural language processing, pp 4133\u20134145","DOI":"10.18653\/v1\/2023.emnlp-main.252"},{"key":"11236_CR54","unstructured":"Dong Z, Wei P, Lin L (2022) DreamArtist: towards controllable one-shot text-to-image generation via positive-negative prompt-tuning. arXiv preprint. arXiv:2211.11337"},{"key":"11236_CR55","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1016\/j.csl.2019.06.009","volume":"59","author":"O Du\u0161ek","year":"2020","unstructured":"Du\u0161ek O, Novikova J, Rieser V (2020) Evaluating the state-of-the-art of end-to-end natural language generation: the E2E NLG challenge. Comput Speech Lang 59:123\u2013156","journal-title":"Comput Speech Lang"},{"key":"11236_CR56","unstructured":"Edalati A, Tahaei M, Kobyzev I et\u00a0al (2022) Krona: parameter efficient tuning with kronecker adapter. arXiv preprint. arXiv:2212.10650"},{"key":"11236_CR57","unstructured":"Eisele A, Chen Y (2010) Multiun: a multilingual corpus from united nation documents. In: LREC"},{"key":"11236_CR58","unstructured":"Ethayarajh K, Choi Y, Swayamdipta S (2022) Understanding dataset difficulty with $$\\cal{V}$$-usable information. In: Chaudhuri K, Jegelka S, Song L et\u00a0al (eds) Proceedings of the 39th international conference on machine learning, proceedings of machine learning research, vol 162. PMLR, pp 5988\u20136008"},{"key":"11236_CR59","unstructured":"Fan J, Wang Z, Xie Y et\u00a0al (2020) A theoretical analysis of deep Q-learning. In: Learning for dynamics and control, PMLR, pp 486\u2013489"},{"key":"11236_CR61","doi-asserted-by":"crossref","unstructured":"Fu Z, Yang H, So AMC et\u00a0al (2023) On the effectiveness of parameter-efficient fine-tuning. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp 12799\u201312807","DOI":"10.1609\/aaai.v37i11.26505"},{"key":"11236_CR60","doi-asserted-by":"crossref","unstructured":"Fu M, Zhu K, Wu J (2024) DTL: disentangled transfer learning for visual recognition. In: Proceedings of the AAAI conference on artificial intelligence, pp 12082\u201312090","DOI":"10.1609\/aaai.v38i11.29096"},{"key":"11236_CR191","unstructured":"G Team, Anil R, Borgeaud S et\u00a0al (2023) Gemini: a family of highly capable multimodal models. arXiv preprint. arXiv:2312.11805"},{"key":"11236_CR62","unstructured":"Gal R, Alaluf Y, Atzmon Y et\u00a0al (2022) An image is worth one word: personalizing text-to-image generation using textual inversion. arXiv preprint. arXiv:2208.01618"},{"key":"11236_CR63","unstructured":"Gao L, Biderman S, Black S et\u00a0al (2020) The Pile: ann 800gb dataset of diverse text for language modeling. arXiv preprint. arXiv:2101.00027"},{"key":"11236_CR64","doi-asserted-by":"crossref","unstructured":"Gardent C, Shimorina A, Narayan S et\u00a0al (2017) Creating training corpora for NLG micro-planning. In: 55th Annual meeting of the association for computational linguistics, ACL 2017. Association for Computational Linguistics (ACL), pp 179\u2013188","DOI":"10.18653\/v1\/P17-1017"},{"key":"11236_CR65","doi-asserted-by":"crossref","unstructured":"Gheini M, Ren X, May J (2021) Cross-attention is all you need: adapting pretrained transformers for machine translation. arXiv preprint. arXiv:2104.08771","DOI":"10.18653\/v1\/2021.emnlp-main.132"},{"key":"11236_CR66","unstructured":"Glaese A, McAleese N, Trbacz M et\u00a0al (2022) Improving alignment of dialogue agents via targeted human judgements. arXiv preprint. arXiv:2209.14375"},{"key":"11236_CR67","doi-asserted-by":"crossref","unstructured":"Gliwa B, Mochol I, Biesek M et\u00a0al (2019) Samsum corpus: a human-annotated dialogue dataset for abstractive summarization. arXiv preprint. arXiv:1911.12237","DOI":"10.18653\/v1\/D19-5409"},{"key":"11236_CR68","unstructured":"Gokaslan A, Cohen V (2019) Openwebtext corpus. http:\/\/Skylion007.github.io\/OpenWebTextCorpus"},{"key":"11236_CR69","doi-asserted-by":"crossref","unstructured":"Guo D, Rush AM, Kim Y (2020) Parameter-efficient transfer learning with diff pruning. arXiv preprint. arXiv:2012.07463","DOI":"10.18653\/v1\/2021.acl-long.378"},{"key":"11236_CR70","unstructured":"Guo D, Yang D, Zhang H et\u00a0al (2025) Deepseek-r1: incentivizing reasoning capability in llms via reinforcement learning. arXiv preprint. arXiv:2501.12948"},{"key":"11236_CR71","unstructured":"Guo H, Greengard P, Xing EP et\u00a0al (2023) LQ-LORA: low-rank plus quantized matrix decomposition for efficient language model finetuning. arXiv preprint. arXiv:2311.12023"},{"key":"11236_CR72","doi-asserted-by":"crossref","unstructured":"Gupta P, Jiao C, Yeh YT et\u00a0al (2022) Instructdial: Improving zero and few-shot generalization in dialogue through instruction tuning. arXiv preprint. arXiv:2205.12673","DOI":"10.18653\/v1\/2022.emnlp-main.33"},{"key":"11236_CR73","doi-asserted-by":"crossref","unstructured":"Han L, Li Y, Zhang H et\u00a0al (2023) SVDIFF: compact parameter space for diffusion fine-tuning. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 7323\u20137334","DOI":"10.1109\/ICCV51070.2023.00673"},{"key":"11236_CR74","unstructured":"Han Z, Gao C, Liu J et\u00a0al (2024) Parameter-efficient fine-tuning for large models: A comprehensive survey. arXiv preprint. arXiv:2403.14608"},{"key":"11236_CR75","unstructured":"Hayou S, Ghosh N, Yu B (2024) Lora+: efficient low rank adaptation of large models. arXiv preprint. arXiv:2402.12354"},{"key":"11236_CR77","unstructured":"He J, Zhou C, Ma X et\u00a0al (2021) Towards a unified view of parameter-efficient transfer learning. arXiv preprint. arXiv:2110.04366"},{"key":"11236_CR78","doi-asserted-by":"crossref","unstructured":"He S, Ding L, Dong D et\u00a0al (2022) Sparseadapter: an easy approach for improving the parameter-efficiency of adapters. arXiv preprint. arXiv:2210.04284","DOI":"10.18653\/v1\/2022.findings-emnlp.160"},{"key":"11236_CR76","doi-asserted-by":"crossref","unstructured":"He H, Cai J, Zhang J et\u00a0al (2023a) Sensitivity-aware visual parameter-efficient fine-tuning. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 11825\u201311835","DOI":"10.1109\/ICCV51070.2023.01086"},{"key":"11236_CR79","unstructured":"He S, Fan RZ, Ding L et\u00a0al (2023b) MERA: merging pretrained adapters for few-shot learning. arXiv preprint. arXiv:2308.15982"},{"key":"11236_CR80","unstructured":"Hendrycks D, Burns C, Basart S et\u00a0al (2021a) Measuring massive multitask language understanding. In: ICLR. OpenReview.net"},{"key":"11236_CR81","unstructured":"Hendrycks D, Burns C, Kadavath S et\u00a0al (2021b) Measuring mathematical problem solving with the math dataset. In: Thirty-fifth conference on neural information processing systems datasets and benchmarks track (Round 2), pp 1\u201311"},{"key":"11236_CR82","unstructured":"Hoffmann J, Borgeaud S, Mensch A et\u00a0al (2022) Training compute-optimal large language models. arXiv preprint. arXiv:2203.15556"},{"key":"11236_CR83","doi-asserted-by":"crossref","unstructured":"Honovich O, Scialom T, Levy O et\u00a0al (2022) Unnatural instructions: tuning language models with (almost) no human labor. arXiv preprint. arXiv:2212.09689","DOI":"10.18653\/v1\/2023.acl-long.806"},{"key":"11236_CR84","unstructured":"Houlsby N, Giurgiu A, Jastrzebski S et\u00a0al (2019) Parameter-efficient transfer learning for nlp. In: International conference on machine learning, PMLR, pp 2790\u20132799"},{"key":"11236_CR85","unstructured":"Hu EJ, Shen Y, Wallis P et\u00a0al (2021) LORA: low-rank adaptation of large language models. arXiv preprint. arXiv:2106.09685"},{"key":"11236_CR86","first-page":"9853","volume":"35","author":"S Hu","year":"2022","unstructured":"Hu S, Zhang Z, Ding N et al (2022) Sparse structure search for delta tuning. Adv Neural Inf Process Syst 35:9853\u20139865","journal-title":"Adv Neural Inf Process Syst"},{"key":"11236_CR87","doi-asserted-by":"crossref","unstructured":"Hu Z, Wang L, Lan Y et\u00a0al (2023) LLM-ADAPTERS: an adapter family for parameter-efficient fine-tuning of large language models. arXiv preprint. arXiv:2304.01933","DOI":"10.18653\/v1\/2023.emnlp-main.319"},{"key":"11236_CR89","doi-asserted-by":"crossref","unstructured":"Huang J, Chang KCC (2022) Towards reasoning in large language models: A survey. arXiv preprint. arXiv:2212.10403","DOI":"10.18653\/v1\/2023.findings-acl.67"},{"key":"11236_CR88","unstructured":"Huang C, Liu Q, Lin BY et\u00a0al (2023a) LORAHUB: efficient cross-task generalization via dynamic lora composition. arXiv preprint. arXiv:2307.13269"},{"key":"11236_CR92","doi-asserted-by":"crossref","unstructured":"Huang Q, Dong X, Chen D et\u00a0al (2023b) Diversity-aware meta visual prompting. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10878\u201310887","DOI":"10.1109\/CVPR52729.2023.01047"},{"key":"11236_CR90","unstructured":"Huang K, Mo F, Li H et\u00a0al (2024a) A survey on large language models with multilingualism: Recent advances and new frontiers. arXiv preprint. arXiv:2405.10936"},{"key":"11236_CR91","unstructured":"Huang K, Qu Y, Cousins H et\u00a0al (2024b) CRISPR-GPT: an llm agent for automated design of gene-editing experiments. arXiv preprint. arXiv:2404.18021"},{"key":"11236_CR93","unstructured":"Iyer S, Lin XV, Pasunuru R et\u00a0al (2022) OPT-IML: scaling language model instruction meta learning through the lens of generalization. arXiv preprint. arXiv:2212.12017"},{"key":"11236_CR94","unstructured":"Jaech A, Kalai A, Lerer A et\u00a0al (2024) OPENAI O1 system card. arXiv preprint. arXiv:2412.16720"},{"key":"11236_CR95","first-page":"24678","volume":"36","author":"J Ji","year":"2023","unstructured":"Ji J, Liu M, Dai J et al (2023) BeaverTails: towards improved safety alignment of LLM via a human-preference dataset. Adv Neural Inf Process Syst 36:24678\u201324704","journal-title":"Adv Neural Inf Process Syst"},{"key":"11236_CR96","doi-asserted-by":"crossref","unstructured":"Jia M, Tang L, Chen BC et\u00a0al (2022) Visual prompt tuning. In: European conference on computer vision. Springer, pp 709\u2013727","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"11236_CR98","first-page":"962","volume":"9","author":"Z Jiang","year":"2021","unstructured":"Jiang Z, Araki J, Ding H et al (2021) How can we know when language models know? On the calibration of language models for question answering. Trans Assoc Comput Ling 9:962\u2013977","journal-title":"Trans Assoc Comput Ling"},{"key":"11236_CR97","unstructured":"Jiang AQ, Sablayrolles A, Mensch A et\u00a0al (2023) MISTRAL 7B. arXiv preprint. arXiv:2310.06825"},{"key":"11236_CR99","unstructured":"Jie S, Deng ZH (2022) Convolutional bypasses are better vision transformer adapters. arXiv preprint. arXiv:2207.07039"},{"key":"11236_CR100","doi-asserted-by":"crossref","unstructured":"Jie S, Deng ZH (2023) Fact: Factor-tuning for lightweight adaptation on vision transformer. In: Proceedings of the AAAI conference on artificial intelligence, pp 1060\u20131068","DOI":"10.1609\/aaai.v37i1.25187"},{"key":"11236_CR101","doi-asserted-by":"crossref","unstructured":"Jie S, Wang H, Deng ZH (2023) Revisiting the parameter efficiency of adapters from the perspective of precision redundancy. In: Proceedings of the IEEE\/CVf international conference on computer vision, pp 17217\u201317226","DOI":"10.1109\/ICCV51070.2023.01579"},{"key":"11236_CR102","unstructured":"Kalla D, Smith N, Samaah F et\u00a0al (2023) Study and analysis of ChatGPT and its impact on different fields of study. Int J Innov Sci Res Technol 8(3):827"},{"key":"11236_CR103","unstructured":"Kaplan J, McCandlish S, Henighan T et\u00a0al (2020) Scaling laws for neural language models. arXiv preprint. arXiv:2001.08361"},{"key":"11236_CR104","first-page":"1022","volume":"34","author":"R Karimi Mahabadi","year":"2021","unstructured":"Karimi Mahabadi R, Henderson J, Ruder S (2021) Compacter: efficient low-rank hypercomplex adapter layers. Adv Neural Inf Process Syst 34:1022\u20131035","journal-title":"Adv Neural Inf Process Syst"},{"key":"11236_CR105","unstructured":"Keskar NS, McCann B, Xiong C et\u00a0al (2019) Unifying question answering, text classification, and regression via span extraction. arXiv preprint. arXiv:1904.09286"},{"key":"11236_CR106","doi-asserted-by":"crossref","unstructured":"Khashabi D, Min S, Khot T et\u00a0al (2020) UNIFIEDQA: crossing format boundaries with a single qa system. arXiv preprint. arXiv:2005.00700","DOI":"10.18653\/v1\/2020.findings-emnlp.171"},{"issue":"5","key":"11236_CR108","doi-asserted-by":"publisher","first-page":"598","DOI":"10.1016\/j.jpurol.2023.05.018","volume":"19","author":"JK Kim","year":"2023","unstructured":"Kim JK, Chua M, Rickard M et al (2023) Chatgpt and large language model (LLM) chatbots: The current state of acceptability and a proposal for guidelines on utilization in academic medicine. J Pediatr Urol 19(5):598\u2013604","journal-title":"J Pediatr Urol"},{"key":"11236_CR107","unstructured":"Kim J, Lee JH, Kim S et\u00a0al (2024) Memory-efficient fine-tuning of compressed large language models via sub-4-bit integer quantization. In: Advances in neural information processing systems, vol 36"},{"key":"11236_CR109","unstructured":"Knox WB, Stone P (2008) TAMER: training an agent manually via evaluative reinforcement. In: 2008 7th IEEE international conference on development and learning. IEEE, pp 292\u2013297"},{"key":"11236_CR110","unstructured":"Kocetkov D, Li R, Allal L et al (2022) The stack: 3 tb of permissively licensed source code. arXiv preprint. https:\/\/arxiv.org\/abs\/2211.15533"},{"key":"11236_CR111","first-page":"22199","volume":"35","author":"T Kojima","year":"2022","unstructured":"Kojima T, Gu SS, Reid M et al (2022) Large language models are zero-shot reasoners. Adv Neural Inf Process Syst 35:22199\u201322213","journal-title":"Adv Neural Inf Process Syst"},{"key":"11236_CR112","doi-asserted-by":"crossref","unstructured":"Lawton N, Kumar A, Thattai G et\u00a0al (2023) Neural architecture search for parameter-efficient fine-tuning of large pre-trained language models. arXiv preprint. arXiv:2305.16597","DOI":"10.18653\/v1\/2023.findings-acl.539"},{"key":"11236_CR113","unstructured":"Lee H, Phatale S, Mansoor H et\u00a0al (2023) RLAIF: scaling reinforcement learning from human feedback with ai feedback. arXiv preprint. arXiv:2309.00267"},{"key":"11236_CR114","doi-asserted-by":"crossref","unstructured":"Lee J, Stevens N, Han SC et\u00a0al (2024) A survey of large language models in finance (FINLLMS). arXiv preprint. arXiv:2402.02315","DOI":"10.1007\/s00521-024-10495-6"},{"key":"11236_CR115","unstructured":"Lei T, Bai J, Brahma S et\u00a0al (2024) Conditional adapters: parameter-efficient transfer learning with fast inference. In: Advances in neural information processing systems, vol 36"},{"key":"11236_CR116","doi-asserted-by":"crossref","unstructured":"Lester B, Al-Rfou R, Constant N (2021) The power of scale for parameter-efficient prompt tuning. arXiv preprint. arXiv:2104.08691","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"11236_CR120","doi-asserted-by":"crossref","unstructured":"Li S, Hoefler T (2021) Chimera: efficiently training large-scale neural networks with bidirectional pipelines. In: Proceedings of the international conference for high performance computing, networking, storage and analysis, pp 1\u201314","DOI":"10.1145\/3458817.3476145"},{"key":"11236_CR122","doi-asserted-by":"crossref","unstructured":"Li XL, Liang P (2021) Prefix-tuning: Optimizing continuous prompts for generation. arXiv preprint. arXiv:2101.00190","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"11236_CR121","unstructured":"Li X, Tramer F, Liang P et\u00a0al (2021) Large language models can be strong differentially private learners. arXiv preprint. arXiv:2110.05679"},{"key":"11236_CR118","unstructured":"Li J, Li D, Savarese S et\u00a0al (2023a) BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint. arXiv:2301.12597"},{"key":"11236_CR123","unstructured":"Li Y, Yu Y, Liang C et\u00a0al (2023b) LOFTQ: lora-fine-tuning-aware quantization for large language models. In: The Twelfth international conference on learning representations"},{"key":"11236_CR117","unstructured":"Li H, Chen J, Yang J et\u00a0al (2024a) LegalAgentBench: evaluating LLM agents in legal domain. arXiv preprint. arXiv:2412.17259"},{"issue":"9","key":"11236_CR119","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3649449","volume":"56","author":"J Li","year":"2024","unstructured":"Li J, Tang T, Zhao WX et al (2024b) Pre-trained language models for text generation: a survey. ACM Comput Surv 56(9):1\u201339","journal-title":"ACM Comput Surv"},{"key":"11236_CR124","unstructured":"Lialin V, Deshpande V, Rumshisky A (2023) Scaling down to scale up: a guide to parameter-efficient fine-tuning. arXiv preprint. arXiv:2303.15647"},{"key":"11236_CR125","first-page":"109","volume":"35","author":"D Lian","year":"2022","unstructured":"Lian D, Zhou D, Feng J et al (2022) Scaling & shifting your features: a new baseline for efficient model tuning. Adv Neural Inf Process Syst 35:109\u2013123","journal-title":"Adv Neural Inf Process Syst"},{"key":"11236_CR126","doi-asserted-by":"crossref","unstructured":"Liao B, Meng Y, Monz C (2023) Parameter-efficient fine-tuning without introducing new latency. arXiv preprint. arXiv:2305.16742","DOI":"10.18653\/v1\/2023.acl-long.233"},{"key":"11236_CR128","doi-asserted-by":"crossref","unstructured":"Lin Z, Madotto A, Fung P (2020) Exploring versatile generative language model via parameter-efficient transfer learning. arXiv preprint. arXiv:2004.03829","DOI":"10.18653\/v1\/2020.findings-emnlp.41"},{"key":"11236_CR127","doi-asserted-by":"crossref","unstructured":"Lin S, Hilton J, Evans O (2021) TRUTHFULQA: measuring how models mimic human falsehoods. arXiv preprint. arXiv:2109.07958","DOI":"10.18653\/v1\/2022.acl-long.229"},{"key":"11236_CR137","doi-asserted-by":"crossref","unstructured":"Liu X, He P, Chen W et\u00a0al (2019) Multi-task deep neural networks for natural language understanding. arXiv preprint. arXiv:1901.11504","DOI":"10.18653\/v1\/P19-1441"},{"key":"11236_CR138","doi-asserted-by":"crossref","unstructured":"Liu X, Ji K, Fu Y et\u00a0al (2021a) P-tuning v2: prompt tuning can be comparable to fine-tuning universally across scales and tasks. arXiv preprint. arXiv:2110.07602","DOI":"10.18653\/v1\/2022.acl-short.8"},{"key":"11236_CR139","unstructured":"Liu X, Zheng Y, Du Z et\u00a0al (2021b) GPT understands, too. arXiv preprint. arXiv:2103.10385"},{"key":"11236_CR142","first-page":"36889","volume":"35","author":"YC Liu","year":"2022","unstructured":"Liu YC, Ma CY, Tian J et al (2022a) POLYHISTOR: parameter-efficient multi-task adaptation for dense vision tasks. Adv Neural Inf Process Syst 35:36889\u201336901","journal-title":"Adv Neural Inf Process Syst"},{"key":"11236_CR140","doi-asserted-by":"crossref","unstructured":"Liu X, Sun T, Huang X et\u00a0al (2022b) Late prompt tuning: a late prompt could be better than many prompts. arXiv preprint. arXiv:2210.11292","DOI":"10.18653\/v1\/2022.findings-emnlp.95"},{"key":"11236_CR131","first-page":"1950","volume":"35","author":"H Liu","year":"2022","unstructured":"Liu H, Tam D, Muqeeth M et al (2022c) Few-shot parameter-efficient fine-tuning is better and cheaper than in-context learning. Adv Neural Inf Process Syst 35:1950\u20131965","journal-title":"Adv Neural Inf Process Syst"},{"key":"11236_CR132","doi-asserted-by":"crossref","unstructured":"Liu H, Li C, Li Y et\u00a0al (2023a) Improved baselines with visual instruction tuning. arXiv preprint. arXiv:2310.03744","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"11236_CR135","doi-asserted-by":"crossref","unstructured":"Liu Q, Wu X, Zhao X et\u00a0al (2023b) MOELORA: an MOE-based parameter efficient fine-tuning method for multi-task medical applications. arXiv preprint. arXiv:2310.18339","DOI":"10.1145\/3626772.3657722"},{"key":"11236_CR143","unstructured":"Liu Z, Feng R, Zhu K et\u00a0al (2023c) Cones: Concept neurons in diffusion models for customized generation. arXiv preprint. arXiv:2303.05125"},{"key":"11236_CR129","unstructured":"Liu A, Feng B, Wang B et\u00a0al (2024a) DEEPSEEK-V2: a strong, economical, and efficient mixture-of-experts language model. arXiv preprint. arXiv:2405.04434"},{"key":"11236_CR130","unstructured":"Liu A, Feng B, Xue B et\u00a0al (2024b) DEEPSEEK-V3 technical report. arXiv preprint. arXiv:2412.19437"},{"key":"11236_CR133","unstructured":"Liu H, Li C, Wu Q et\u00a0al (2024c) Visual instruction tuning. In: Advances in neural information processing systems, vol 36"},{"key":"11236_CR134","unstructured":"Liu J, Xiao G, Li K et\u00a0al (2024d) BITDELTA: your fine-tune may only be worth one bit. arXiv preprint. arXiv:2402.10193"},{"key":"11236_CR136","unstructured":"Liu SY, Wang CY, Yin H et\u00a0al (2024e) DORA: weight-decomposed low-rank adaptation. arXiv preprint. arXiv:2402.09353"},{"key":"11236_CR141","doi-asserted-by":"publisher","first-page":"208","DOI":"10.1016\/j.aiopen.2023.08.012","volume":"5","author":"X Liu","year":"2024","unstructured":"Liu X, Zheng Y, Du Z et al (2024f) GPT understands, too. AI Open 5:208\u2013215","journal-title":"AI Open"},{"key":"11236_CR144","doi-asserted-by":"crossref","unstructured":"Liu Z, Kundu S, Li A et\u00a0al (2024g) AFLORA: adaptive freezing of low rank adaptation in parameter efficient fine-tuning of large models. arXiv preprint. arXiv:2403.13269","DOI":"10.18653\/v1\/2024.acl-short.16"},{"key":"11236_CR145","doi-asserted-by":"crossref","unstructured":"Lo K, Wang LL, Neumann M et\u00a0al (2020) S2ORC: the semantic scholar open research corpus. In: ACL. Association for Computational Linguistics, pp 4969\u20134983","DOI":"10.18653\/v1\/2020.acl-main.447"},{"key":"11236_CR146","doi-asserted-by":"crossref","unstructured":"Lu X, Brahman F, West P et\u00a0al (2023) Inference-time policy adapters (IPA): tailoring extreme-scale lms without fine-tuning. In: Proceedings of the 2023 conference on empirical methods in natural language processing, pp 6863\u20136883","DOI":"10.18653\/v1\/2023.emnlp-main.424"},{"key":"11236_CR147","doi-asserted-by":"crossref","unstructured":"Ma F, Zhang C, Ren L et\u00a0al (2022) XPROMPT: exploring the extreme of prompt tuning. arXiv preprint. arXiv:2210.04457","DOI":"10.18653\/v1\/2022.emnlp-main.758"},{"key":"11236_CR148","unstructured":"Mahabadi RK, Ruder S, Dehghani M et\u00a0al (2021) Parameter-efficient multi-task fine-tuning for transformers via shared hypernetworks. arXiv preprint. arXiv:2106.04489"},{"key":"11236_CR149","doi-asserted-by":"crossref","unstructured":"Mao Y, Mathias L, Hou R et\u00a0al (2021) UNIPELT: a unified framework for parameter-efficient language model tuning. arXiv preprint. arXiv:2110.07577","DOI":"10.18653\/v1\/2022.acl-long.433"},{"key":"11236_CR150","doi-asserted-by":"crossref","unstructured":"Marjit S, Singh H, Mathur N et\u00a0al (2024) DIFFUSEKRONA: a parameter efficient fine-tuning method for personalized diffusion model. arXiv preprint. arXiv:2402.17412","DOI":"10.1109\/WACV61041.2025.00348"},{"key":"11236_CR151","unstructured":"McCann B, Keskar NS, Xiong C et\u00a0al (2018) The natural language decathlon: multitask learning as question answering. arxiv preprint. arXiv:1806.08730"},{"key":"11236_CR152","unstructured":"Meng X, Dai D, Luo W et\u00a0al (2024) PERIODICLORA: breaking the low-rank bottleneck in lora optimization. arXiv preprint. arXiv:2402.16141"},{"key":"11236_CR153","doi-asserted-by":"crossref","unstructured":"Min S, Lewis M, Zettlemoyer L et\u00a0al (2021) METAICL: learning to learn in context. arXiv preprint. arXiv:2110.15943","DOI":"10.18653\/v1\/2022.naacl-main.201"},{"key":"11236_CR154","doi-asserted-by":"crossref","unstructured":"Mishra S, Khashabi D, Baral C et\u00a0al (2021) Cross-task generalization via natural language crowdsourcing instructions. arXiv preprint. arXiv:2104.08773","DOI":"10.18653\/v1\/2022.acl-long.244"},{"key":"11236_CR155","doi-asserted-by":"crossref","unstructured":"Mou C, Wang X, Xie L et\u00a0al (2023) T2I-ADAPTER: learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv preprint. arXiv:2302.08453","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"11236_CR156","doi-asserted-by":"crossref","unstructured":"Muennighoff N, Wang T, Sutawika L et\u00a0al (2022) Crosslingual generalization through multitask finetuning. arXiv preprint. arXiv:2211.01786","DOI":"10.18653\/v1\/2023.acl-long.891"},{"key":"11236_CR157","unstructured":"Nakano R, Hilton J, Balaji S et\u00a0al (2021) WEBGPT: browser-assisted question-answering with human feedback. arXiv preprint. arXiv:2112.09332"},{"key":"11236_CR158","doi-asserted-by":"crossref","unstructured":"Nan L, Radev DR, Zhang R et\u00a0al (2021) DART: open-domain structured data record to text generation. In: NAACL-HLT. Association for Computational Linguistics, pp 432\u2013447","DOI":"10.18653\/v1\/2021.naacl-main.37"},{"key":"11236_CR159","doi-asserted-by":"crossref","unstructured":"Narayan S, Cohen SB, Lapata M (2018) Don\u2019t give me the details, just the summary! topic-aware convolutional neural networks for extreme summarization. In: EMNLP. Association for Computational Linguistics, pp 1797\u20131807","DOI":"10.18653\/v1\/D18-1206"},{"key":"11236_CR160","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang L, Wu J, Jiang X et al (2022) Training language models to follow instructions with human feedback. Adv Neural Inf Process Syst 35:27730\u201327744","journal-title":"Adv Neural Inf Process Syst"},{"key":"11236_CR161","unstructured":"Pan Z, Luo H, Li M et\u00a0al (2024) CONV-COA: improving open-domain question answering in large language models via conversational chain-of-action. arXiv preprint. arXiv:2405.17822"},{"key":"11236_CR162","doi-asserted-by":"crossref","unstructured":"Pfeiffer J, Kamath A, R\u00fcckl\u00e9 A et\u00a0al (2020) ADAPTERFUSION: non-destructive task composition for transfer learning. arXiv preprint. arXiv:2005.00247","DOI":"10.18653\/v1\/2021.eacl-main.39"},{"key":"11236_CR163","unstructured":"Qin Y, Wang X, Su Y et\u00a0al (2021) Exploring universal intrinsic task subspace via prompt tuning. arXiv preprint. arXiv:2110.07867"},{"key":"11236_CR164","unstructured":"Qiu Z, Liu W, Feng H et\u00a0al (2024) Controlling text-to-image diffusion by orthogonal finetuning. In: Advances in neural information processing systems, vol 36"},{"key":"11236_CR165","unstructured":"Radford A, Narasimhan K, Salimans T et\u00a0al (2018) Improving language understanding by generative pre-training. Technical Report, OpenAI"},{"issue":"8","key":"11236_CR166","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford A, Wu J, Child R et al (2019) Language models are unsupervised multitask learners. OpenAI Blog 1(8):9","journal-title":"OpenAI Blog"},{"key":"11236_CR167","unstructured":"Radford A, Kim JW, Hallacy C et\u00a0al (2021) Learning transferable visual models from natural language supervision. In: International conference on machine learning, PMLR, pp 8748\u20138763"},{"key":"11236_CR168","unstructured":"Rae JW, Potapenko A, Jayakumar SM et\u00a0al (2020) Compressive transformers for long-range sequence modelling. In: ICLR. OpenReview.net"},{"key":"11236_CR169","unstructured":"Rafailov R, Sharma A, Mitchell E et\u00a0al (2024) Direct preference optimization: your language model is secretly a reward model. In: Advances in neural information processing systems, vol 36"},{"issue":"140","key":"11236_CR170","first-page":"1","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel C, Shazeer N, Roberts A et al (2020) Exploring the limits of transfer learning with a unified text-to-text transformer. J Mach Learn Res 21(140):1\u201367","journal-title":"J Mach Learn Res"},{"key":"11236_CR171","doi-asserted-by":"crossref","unstructured":"Rajabzadeh H, Valipour M, Zhu T et\u00a0al (2024) QDYLORA: quantized dynamic low-rank adaptation for efficient large language model tuning. arXiv preprint. arXiv:2402.10462","DOI":"10.18653\/v1\/2024.emnlp-industry.53"},{"key":"11236_CR172","unstructured":"Rein D, Hou BL, Stickland AC et\u00a0al (2024) GPQA: a graduate-level google-proof q &a benchmark. In: First conference on language modeling"},{"key":"11236_CR173","first-page":"8583","volume":"34","author":"C Riquelme","year":"2021","unstructured":"Riquelme C, Puigcerver J, Mustafa B et al (2021) Scaling vision with sparse mixture of experts. Adv Neural Inf Process Syst 34:8583\u20138595","journal-title":"Adv Neural Inf Process Syst"},{"key":"11236_CR174","doi-asserted-by":"crossref","unstructured":"R\u00fcckl\u00e9 A, Geigle G, Glockner M et\u00a0al (2020) ADAPTERDROP: on the efficiency of adapters in transformers. arXiv preprint. arXiv:2010.11918","DOI":"10.18653\/v1\/2021.emnlp-main.626"},{"key":"11236_CR175","doi-asserted-by":"crossref","unstructured":"Ruiz N, Li Y, Jampani V et\u00a0al (2023) DREAMBOOTH: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 22500\u201322510","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"11236_CR176","unstructured":"Sanh V, Webson A, Raffel C et\u00a0al (2021) Multitask prompted training enables zero-shot task generalization. arXiv preprint. arXiv:2110.08207"},{"key":"11236_CR177","unstructured":"Saparov A, He H (2022) Language models are greedy reasoners: a systematic formal analysis of chain-of-thought. arXiv preprint. arXiv:2210.01240"},{"key":"11236_CR178","unstructured":"Schulman J, Wolski F, Dhariwal P et\u00a0al (2017) Proximal policy optimization algorithms. arXiv preprint. arXiv:1707.06347"},{"key":"11236_CR179","unstructured":"Shao Z, Wang P, Zhu Q et\u00a0al (2024) DEEPSEEKMATH: pushing the limits of mathematical reasoning in open language models. arXiv preprint. arXiv:2402.03300"},{"key":"11236_CR180","unstructured":"Shi Z, Lipani A (2023) DEPT: decomposed prompt tuning for parameter-efficient fine-tuning. arXiv preprint. arXiv:2309.05173"},{"issue":"7972","key":"11236_CR181","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1038\/s41586-023-06291-2","volume":"620","author":"K Singhal","year":"2023","unstructured":"Singhal K, Azizi S, Tu T et al (2023) Large language models encode clinical knowledge. Nature 620(7972):172\u2013180","journal-title":"Nature"},{"key":"11236_CR182","unstructured":"Sprague Z, Ye X, Bostrom K et\u00a0al (2023) MUSR: testing the limits of chain-of-thought with multistep soft reasoning. arXiv preprint. arXiv:2310.16049"},{"key":"11236_CR183","unstructured":"Su Y, Wang X, Qin Y et\u00a0al (2021) On transferability of prompt tuning for natural language processing. arXiv preprint. arXiv:2111.06719"},{"key":"11236_CR184","unstructured":"Sun Q, Fang Y, Wu L et\u00a0al (2023) EVA-CLIP: improved training techniques for clip at scale. arXiv preprint. arXiv:2303.15389"},{"key":"11236_CR185","first-page":"24193","volume":"34","author":"YL Sung","year":"2021","unstructured":"Sung YL, Nair V, Raffel CA (2021) Training neural networks with fixed sparse masks. Adv Neural Inf Process Syst 34:24193\u201324205","journal-title":"Adv Neural Inf Process Syst"},{"key":"11236_CR186","first-page":"12991","volume":"35","author":"YL Sung","year":"2022","unstructured":"Sung YL, Cho J, Bansal M (2022) LST: ladder side-tuning for parameter and memory efficient transfer learning. Adv Neural Inf Process Syst 35:12991\u201313005","journal-title":"Adv Neural Inf Process Syst"},{"key":"11236_CR187","unstructured":"Sutton RS (1995) Generalization in reinforcement learning: Successful examples using sparse coarse coding. In: Advances in neural information processing systems, vol 8"},{"key":"11236_CR188","doi-asserted-by":"crossref","unstructured":"Suzgun M, Scales N, Sch\u00e4rli N et\u00a0al (2023) Challenging big-bench tasks and whether chain-of-thought can solve them. In: ACL (findings). Association for Computational Linguistics, pp 13003\u201313051","DOI":"10.18653\/v1\/2023.findings-acl.824"},{"key":"11236_CR189","unstructured":"Tang A, Shen L, Luo Y et\u00a0al (2023) Parameter efficient multi-task model fusion with partial linearization. arXiv preprint. arXiv:2310.04742"},{"key":"11236_CR190","doi-asserted-by":"crossref","unstructured":"Tay Y, Wei J, Chung HW et\u00a0al (2022) Transcending scaling laws with 0.1% extra compute. arXiv preprint. arXiv:2210.11399","DOI":"10.18653\/v1\/2023.emnlp-main.91"},{"key":"11236_CR193","doi-asserted-by":"crossref","unstructured":"Tian K, Mitchell E, Zhou A et\u00a0al (2023) Just ask for calibration: Strategies for eliciting calibrated confidence scores from language models fine-tuned with human feedback. arXiv preprint. arXiv:2305.14975","DOI":"10.18653\/v1\/2023.emnlp-main.330"},{"key":"11236_CR192","unstructured":"Tian C, Shi Z, Guo Z et\u00a0al (2024) HYDRALORA: an asymmetric lora architecture for efficient fine-tuning. arXiv preprint. arXiv:2404.19245"},{"key":"11236_CR194","unstructured":"Touvron H, Lavril T, Izacard G et\u00a0al (2023) Llama: Open and efficient foundation language models. arXiv preprint. arXiv:2302.13971"},{"key":"11236_CR195","doi-asserted-by":"crossref","unstructured":"Tu CH, Mai Z, Chao WL (2023) Visual query tuning: towards effective usage of intermediate representations for parameter and memory efficient transfer learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 7725\u20137735","DOI":"10.1109\/CVPR52729.2023.00746"},{"key":"11236_CR196","doi-asserted-by":"crossref","unstructured":"Valipour M, Rezagholizadeh M, Kobyzev I et\u00a0al (2023) DYLORA: parameter-efficient tuning of pre-trained models using dynamic search-free low-rank adaptation. In: Proceedings of the 17th conference of the European chapter of the Association for Computational Linguistics, pp 3274\u20133287","DOI":"10.18653\/v1\/2023.eacl-main.239"},{"key":"11236_CR197","unstructured":"Vaswani A, Shazeer N, Parmar N et\u00a0al (2017) Attention is all you need. In: Advances in neural information processing systems, vol 30"},{"key":"11236_CR198","doi-asserted-by":"publisher","unstructured":"Vavekanand R, Sam K (2024) LLAMA 3.1: an in-depth analysis of the next-generation large language model. https:\/\/doi.org\/10.13140\/RG.2.2.10628.74882","DOI":"10.13140\/RG.2.2.10628.74882"},{"key":"11236_CR199","doi-asserted-by":"crossref","unstructured":"Voynov A, Aberman K, Cohen-Or D (2023a) Sketch-guided text-to-image diffusion models. In: ACM SIGGRAPH 2023 conference proceedings, pp 1\u201311","DOI":"10.1145\/3588432.3591560"},{"key":"11236_CR200","unstructured":"Voynov A, Chu Q, Cohen-Or D et\u00a0al (2023b) $$ p+ $$[CDATA[ p+ ]]: extended textual conditioning in text-to-image generation. arXiv preprint. arXiv:2303.09522"},{"key":"11236_CR201","doi-asserted-by":"crossref","unstructured":"Vu T, Lester B, Constant N et\u00a0al (2021) SPoT: better frozen model adaptation through soft prompt transfer. arXiv preprint. arXiv:2110.07904","DOI":"10.18653\/v1\/2022.acl-long.346"},{"key":"11236_CR202","doi-asserted-by":"crossref","unstructured":"Vu T, Lester B, Constant N et\u00a0al (2022) SPoT: better frozen model adaptation through soft prompt transfer. In: Proceedings of the 60th annual meeting of the association for computational linguistics (volume 1: long papers), pp 5039\u20135059","DOI":"10.18653\/v1\/2022.acl-long.346"},{"key":"11236_CR203","doi-asserted-by":"crossref","unstructured":"Vucetic D, Tayaranian M, Ziaeefard M et\u00a0al (2022) Efficient fine-tuning of bert models on the edge. In: 2022 IEEE international symposium on circuits and systems (ISCAS). IEEE, pp 1838\u20131842","DOI":"10.1109\/ISCAS48785.2022.9937567"},{"key":"11236_CR208","doi-asserted-by":"crossref","unstructured":"Wang J (2023) The power of ai-assisted diagnosis. EAI Endorsed Transactions on e-Learning 8(4)","DOI":"10.4108\/eetel.3772"},{"key":"11236_CR204","unstructured":"Wang A, Pruksachatkun Y, Nangia N et\u00a0al (2019a) SuperGLUE: a stickier benchmark for general-purpose language understanding systems. In: NeurIPS, pp 3261\u20133275"},{"key":"11236_CR205","doi-asserted-by":"crossref","unstructured":"Wang A, Singh A, Michael J et\u00a0al (2019b) GLUE: a multi-task benchmark and analysis platform for natural language understanding. In: ICLR (Poster). OpenReview.net","DOI":"10.18653\/v1\/W18-5446"},{"key":"11236_CR210","unstructured":"Wang P, Yang A, Men R et\u00a0al (2022a) OFA: unifying architectures, tasks, and modalities through a simple sequence-to-sequence learning framework. In: International conference on machine learning, PMLR, pp 23318\u201323340"},{"key":"11236_CR214","doi-asserted-by":"crossref","unstructured":"Wang Y, Kordi Y, Mishra S et\u00a0al (2022b) Self-instruct: aligning language models with self-generated instructions. arXiv preprint. arXiv:2212.10560","DOI":"10.18653\/v1\/2023.acl-long.754"},{"key":"11236_CR209","doi-asserted-by":"crossref","unstructured":"Wang L, Lyu C, Ji T et\u00a0al (2023a) Document-level machine translation with large language models. arXiv preprint. arXiv:2304.02210","DOI":"10.18653\/v1\/2023.emnlp-main.1036"},{"key":"11236_CR211","doi-asserted-by":"crossref","unstructured":"Wang Q, Mao Y, Wang J et\u00a0al (2023b) APROMPT: attention prompt tuning for efficient adaptation of pre-trained language models. In: Proceedings of the 2023 conference on empirical methods in natural language processing, pp 9147\u20139160","DOI":"10.18653\/v1\/2023.emnlp-main.567"},{"key":"11236_CR212","unstructured":"Wang W, Lv Q, Yu W et\u00a0al (2023c) COGVLM: visual expert for pretrained language models. arXiv preprint. arXiv:2311.03079"},{"key":"11236_CR213","unstructured":"Wang X, Hu Z, Lu P et\u00a0al (2023d) SCIBENCH: evaluating college-level scientific problem-solving abilities of large language models. arXiv preprint. arXiv:2307.10635"},{"key":"11236_CR215","unstructured":"Wang Y, Mishra S, Alipoormolabashi P et\u00a0al (2022c) Benchmarking generalization via in-context instructions on 1,600+ language tasks. arXiv preprint. arXiv:2204.07705 2"},{"key":"11236_CR216","doi-asserted-by":"crossref","unstructured":"Wang Y, Mukherjee S, Liu X et\u00a0al (2022d) Adamix: mixture-of-adapter for parameter-efficient tuning of large language models. arXiv preprint. arXiv:2205.12410 1(2):4","DOI":"10.18653\/v1\/2022.emnlp-main.388"},{"key":"11236_CR217","unstructured":"Wang Z, Panda R, Karlinsky L et\u00a0al (2022e) Multitask prompt tuning enables parameter-efficient transfer learning. In: The Eleventh international conference on learning representations"},{"key":"11236_CR206","unstructured":"Wang G, Cheng S, Zhan X et\u00a0al (2024a) OpenChat: advancing open-source language models with mixed-quality data. In: ICLR. OpenReview.net"},{"key":"11236_CR207","doi-asserted-by":"crossref","unstructured":"Wang H, Chang J, Zhai Y et\u00a0al (2024b) LION: implicit vision prompt tuning. In: Proceedings of the AAAI conference on artificial intelligence, pp 5372\u20135380","DOI":"10.1609\/aaai.v38i6.28345"},{"key":"11236_CR218","unstructured":"Wei J, Bosma M, Zhao VY et\u00a0al (2021) Finetuned language models are zero-shot learners. arXiv preprint. arXiv:2109.01652"},{"key":"11236_CR219","unstructured":"Wei J, Tay Y, Bommasani R et\u00a0al (2022) Emergent abilities of large language models. arXiv preprint. arXiv:2206.07682"},{"key":"11236_CR220","doi-asserted-by":"crossref","unstructured":"Wei X, Li G, Marculescu R (2024) ONLINE-LORA: task-free online continual learning via low rank adaptation. arXiv preprint. arXiv:2411.05663","DOI":"10.1109\/WACV61041.2025.00646"},{"key":"11236_CR225","doi-asserted-by":"crossref","unstructured":"Wu Z, Wang S, Gu J et\u00a0al (2022) IDPG: an instance-dependent prompt generation method. arXiv preprint. arXiv:2204.04497","DOI":"10.18653\/v1\/2022.naacl-main.403"},{"key":"11236_CR223","unstructured":"Wu S, Fei H, Qu L et\u00a0al (2023) NEXT-GPT: any-to-any multimodal llm. arXiv preprint. arXiv:2309.05519"},{"key":"11236_CR221","unstructured":"Wu J, Li X, Wei C et\u00a0al (2024a) Unleashing the power of visual prompting at the pixel level. In: TMLR"},{"key":"11236_CR222","unstructured":"Wu J, Yu T, Wang R et\u00a0al (2024b) Infoprompt: Information-theoretic soft prompt tuning for natural language understanding. In: Advances in neural information processing systems, vol 36"},{"key":"11236_CR224","doi-asserted-by":"crossref","unstructured":"Wu Y, Xiang Y, Huo S et\u00a0al (2024c) LORA-SP: streamlined partial parameter adaptation for resource-efficient fine-tuning of large language models. arXiv preprint. arXiv:2403.08822","DOI":"10.1117\/12.3032013"},{"key":"11236_CR226","doi-asserted-by":"crossref","unstructured":"Xie E, Yao L, Shi H et\u00a0al (2023) Difffit: Unlocking transferability of large diffusion models via simple parameter-efficient fine-tuning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 4230\u20134239","DOI":"10.1109\/ICCV51070.2023.00390"},{"key":"11236_CR227","unstructured":"Xin Y, Luo S, Zhou H et\u00a0al (2024) Parameter-efficient fine-tuning for pre-trained vision models: a survey. arXiv preprint. arXiv:2402.02242"},{"key":"11236_CR228","doi-asserted-by":"crossref","unstructured":"Xing F (2024) Designing heterogeneous llm agents for financial sentiment analysis. ACM Trans Manag Inf Syst 16(1):1\u201324","DOI":"10.1145\/3688399"},{"key":"11236_CR232","doi-asserted-by":"crossref","unstructured":"Xu S, Wen X (2024) Automatic design of adapter architectures for enhanced parameter-efficient fine-tuning. In: ICASSP 2024\u20132024 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 12536\u201312540","DOI":"10.1109\/ICASSP48485.2024.10446057"},{"key":"11236_CR231","doi-asserted-by":"crossref","unstructured":"Xu R, Luo F, Zhang Z et\u00a0al (2021) Raise a child in large language model: towards effective and generalizable fine-tuning. arXiv preprint. arXiv:2109.05687","DOI":"10.18653\/v1\/2021.emnlp-main.749"},{"key":"11236_CR229","unstructured":"Xu L, Xie H, Qin SZJ et\u00a0al (2023a) Parameter-efficient fine-tuning methods for pretrained language models: a critical review and assessment. arXiv preprint. arXiv:2312.12148"},{"key":"11236_CR230","doi-asserted-by":"crossref","unstructured":"Xu M, Zhang Z, Wei F et\u00a0al (2023b) Side adapter network for open-vocabulary semantic segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2945\u20132954","DOI":"10.1109\/CVPR52729.2023.00288"},{"key":"11236_CR233","unstructured":"Xu Y, Xie L, Gu X et\u00a0al (2023c) QA-LORA: quantization-aware low-rank adaptation of large language models. In: The Twelfth international conference on learning representations"},{"key":"11236_CR237","doi-asserted-by":"crossref","unstructured":"Yang Z, Qi P, Zhang S et\u00a0al (2018) HOTPOTQA: a dataset for diverse, explainable multi-hop question answering. In: EMNLP. Association for Computational Linguistics, pp 2369\u20132380","DOI":"10.18653\/v1\/D18-1259"},{"key":"11236_CR234","unstructured":"Yang AX, Robeyns M, Wang X et\u00a0al (2023a) Bayesian low-rank adaptation for large language models. In: The Twelfth international conference on learning representations"},{"key":"11236_CR236","doi-asserted-by":"crossref","unstructured":"Yang X, Huang JY, Zhou W et\u00a0al (2023b) Parameter-efficient tuning with special token adaptation. In: Proceedings of the 17th conference of the European chapter of the association for computational linguistics, pp 865\u2013872","DOI":"10.18653\/v1\/2023.eacl-main.60"},{"key":"11236_CR235","unstructured":"Yang B, Tian H, Ren J et\u00a0al (2024) Multi-objective fine-tuning for enhanced program repair with LLMS. arXiv preprint. arXiv:2404.12636"},{"key":"11236_CR238","doi-asserted-by":"crossref","unstructured":"Yao Y, Duan J, Xu K et\u00a0al (2024) A survey on large language model (LLM) security and privacy: the good, the bad, and the ugly. High-Confidence Computing p 100211","DOI":"10.1016\/j.hcc.2024.100211"},{"key":"11236_CR240","unstructured":"Ye S, Kim D, Jang J et\u00a0al (2022) Guess the instruction! making language models stronger zero-shot learners. arXiv preprint. arXiv:2210.02969"},{"key":"11236_CR239","unstructured":"Ye H, Zhang J, Liu S et\u00a0al (2023) IP-ADAPTER: text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint. arXiv:2308.06721"},{"key":"11236_CR241","unstructured":"Yeh SY, Hsieh YG, Gao Z et\u00a0al (2023) Navigating text-to-image customization: from lycoris fine-tuning to model evaluation. arXiv preprint. arXiv:2309.14859"},{"key":"11236_CR242","doi-asserted-by":"crossref","unstructured":"Yin D, Yang Y, Wang Z et\u00a0al (2023) 1% vs 100%: parameter-efficient low rank adapter for dense predictions. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 20116\u201320126","DOI":"10.1109\/CVPR52729.2023.01926"},{"key":"11236_CR243","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1016\/j.aiopen.2021.06.001","volume":"2","author":"S Yuan","year":"2021","unstructured":"Yuan S, Zhao H, Du Z et al (2021) Wudaocorpora: a super large-scale Chinese corpora for pre-training language models. AI Open 2:65\u201368","journal-title":"AI Open"},{"key":"11236_CR244","unstructured":"Zadouri T, \u00dcst\u00fcn A, Ahmadian A et\u00a0al (2023) Pushing mixture of experts to the limit: extremely parameter efficient moe for instruction tuning. arXiv preprint. arXiv:2309.05444"},{"key":"11236_CR245","unstructured":"Zaken EB, Ravfogel S, Goldberg Y (2021) Bitfit: simple parameter-efficient fine-tuning for transformer-based masked language-models. arXiv preprint. arXiv:2106.10199"},{"key":"11236_CR246","unstructured":"Zavras A, Michail D, Demir B et\u00a0al (2024) Mind the modality gap: towards a remote sensing vision-language model via cross-modal alignment. arXiv preprint. arXiv:2402.09816"},{"key":"11236_CR247","doi-asserted-by":"crossref","unstructured":"Zellers R, Holtzman A, Bisk Y et\u00a0al (2019) Hellaswag: can a machine really finish your sentence? In: Proceedings of the 57th annual meeting of the Association for Computational Linguistics","DOI":"10.18653\/v1\/P19-1472"},{"key":"11236_CR248","doi-asserted-by":"crossref","unstructured":"Zeng G, Zhang P, Lu W (2023) One network, many masks: towards more parameter-efficient transfer learning. arXiv preprint. arXiv:2305.17682","DOI":"10.18653\/v1\/2023.acl-long.418"},{"key":"11236_CR249","doi-asserted-by":"crossref","unstructured":"Zhai X, Kolesnikov A, Houlsby N et\u00a0al (2022) Scaling vision transformers. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12104\u201312113","DOI":"10.1109\/CVPR52688.2022.01179"},{"key":"11236_CR252","doi-asserted-by":"crossref","unstructured":"Zhang H, Xu J, Wang J (2019) Pretraining-based natural language generation for text summarization. arXiv preprint. arXiv:1902.09243","DOI":"10.18653\/v1\/K19-1074"},{"key":"11236_CR259","unstructured":"Zhang Y, Zhou K, Liu Z (2022) Neural prompt search. arXiv preprint. arXiv:2206.04673"},{"key":"11236_CR250","doi-asserted-by":"crossref","unstructured":"Zhang B, Yang H, Zhou T et\u00a0al (2023a) Enhancing financial sentiment analysis via retrieval augmented large language models. In: Proceedings of the fourth ACM international conference on AI in finance, pp 349\u2013356","DOI":"10.1145\/3604237.3626866"},{"key":"11236_CR251","unstructured":"Zhang F, Li L, Chen J et\u00a0al (2023b) Increlora: Incremental parameter allocation method for parameter-efficient fine-tuning. arXiv preprint. arXiv:2308.12043"},{"key":"11236_CR253","doi-asserted-by":"crossref","unstructured":"Zhang L, Rao A, Agrawala M (2023c) Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 3836\u20133847","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"11236_CR254","unstructured":"Zhang L, Zhang L, Shi S et\u00a0al (2023d) LORA-FA: memory-efficient low-rank adaptation for large language models fine-tuning. arXiv preprint. arXiv:2308.03303"},{"key":"11236_CR255","doi-asserted-by":"crossref","unstructured":"Zhang M, Shen C, Yang Z et\u00a0al (2023e) Pruning meets low-rank parameter-efficient fine-tuning. arXiv preprint. arXiv:2305.18403","DOI":"10.18653\/v1\/2024.findings-acl.178"},{"key":"11236_CR256","unstructured":"Zhang Q, Chen M, Bukharin A et\u00a0al (2023f) ADALORA: adaptive budget allocation for parameter-efficient fine-tuning. arXiv preprint. arXiv:2303.10512"},{"key":"11236_CR257","unstructured":"Zhang R, Han J, Liu C et\u00a0al (2023g) LLAMA-ADAPTER: efficient fine-tuning of language models with zero-init attention. arXiv preprint. arXiv:2303.16199"},{"key":"11236_CR258","unstructured":"Zhang X, Li C, Zong Y et\u00a0al (2023h) Evaluating the performance of large language models on gaokao benchmark. arXiv preprint. arXiv:2305.12474"},{"key":"11236_CR260","doi-asserted-by":"crossref","unstructured":"Zhang ZR, Tan C, Xu H et\u00a0al (2023i) Towards adaptive prefix tuning for parameter-efficient language model fine-tuning. arXiv preprint. arXiv:2305.15212","DOI":"10.18653\/v1\/2023.acl-short.107"},{"key":"11236_CR263","doi-asserted-by":"crossref","unstructured":"Zhao M, Lin T, Mi F et\u00a0al (2020) Masking as an efficient alternative to finetuning for pretrained language models. arXiv preprint. arXiv:2004.12406","DOI":"10.18653\/v1\/2020.emnlp-main.174"},{"key":"11236_CR261","doi-asserted-by":"crossref","unstructured":"Zhao H, Tan H, Mei H (2022) Tiny-attention adapter: contexts are more important than the number of parameters. arXiv preprint. arXiv:2211.01979","DOI":"10.18653\/v1\/2022.emnlp-main.444"},{"key":"11236_CR262","doi-asserted-by":"crossref","unstructured":"Zhao H, Fu J, He Z (2023) Prototype-based hyperadapter for sample-efficient multi-task tuning. arXiv preprint. arXiv:2310.11670","DOI":"10.18653\/v1\/2023.emnlp-main.280"},{"key":"11236_CR264","unstructured":"Zhao S, Chen D, Chen YC et\u00a0al (2024) UNI-CONTROLNET: all-in-one control to text-to-image diffusion models. In: Advances in neural information processing systems, vol 36"},{"key":"11236_CR265","first-page":"46595","volume":"36","author":"L Zheng","year":"2023","unstructured":"Zheng L, Chiang WL, Sheng Y et al (2023) Judging llm-as-a-judge with mt-bench and chatbot arena. Adv Neural Inf Process Syst 36:46595\u201346623","journal-title":"Adv Neural Inf Process Syst"},{"key":"11236_CR266","unstructured":"Zhong V, Xiong C, Socher R (2017) SEQ2SQL: generating structured queries from natural language using reinforcement learning. arXiv preprint. arXiv:1709.00103"},{"key":"11236_CR267","doi-asserted-by":"crossref","unstructured":"Zhong W, Cui R, Guo Y et\u00a0al (2024) AGIEVAL: a human-centric benchmark for evaluating foundation models. In: NAACL-HLT (Findings). Association for Computational Linguistics, pp 2299\u20132314","DOI":"10.18653\/v1\/2024.findings-naacl.149"},{"key":"11236_CR268","first-page":"525","volume":"12","author":"H Zhou","year":"2024","unstructured":"Zhou H, Wan X, Vuli\u0107 I et al (2024) AUTOPEFT: automatic configuration search for parameter-efficient fine-tuning. Trans Assoc Comput Ling 12:525\u2013542","journal-title":"Trans Assoc Comput Ling"},{"key":"11236_CR269","doi-asserted-by":"crossref","unstructured":"Zhu W, Tan M (2023) SPT: learning to selectively insert prompts for better prompt tuning. In: The 2023 conference on empirical methods in natural language processing","DOI":"10.18653\/v1\/2023.emnlp-main.727"},{"key":"11236_CR271","doi-asserted-by":"crossref","unstructured":"Zhu Y, Kiros R, Zemel RS et\u00a0al (2015) Aligning books and movies: towards story-like visual explanations by watching movies and reading books. In: ICCV. IEEE Computer Society, pp 19\u201327","DOI":"10.1109\/ICCV.2015.11"},{"key":"11236_CR272","doi-asserted-by":"crossref","unstructured":"Zhu Y, Feng J, Zhao C et\u00a0al (2021) Counter-interference adapter for multilingual machine translation. arXiv preprint. arXiv:2104.08154","DOI":"10.18653\/v1\/2021.findings-emnlp.240"},{"key":"11236_CR270","doi-asserted-by":"crossref","unstructured":"Zhu W, Liu H, Dong Q et\u00a0al (2023) Multilingual machine translation with large language models: empirical results and analysis. arXiv preprint. arXiv:2304.04675","DOI":"10.18653\/v1\/2024.findings-naacl.176"}],"container-title":["Artificial Intelligence Review"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-025-11236-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10462-025-11236-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-025-11236-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,23]],"date-time":"2025-06-23T06:35:06Z","timestamp":1750660506000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10462-025-11236-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,3]]},"references-count":272,"journal-issue":{"issue":"8","published-online":{"date-parts":[[2025,8]]}},"alternative-id":["11236"],"URL":"https:\/\/doi.org\/10.1007\/s10462-025-11236-4","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-5393239\/v1","asserted-by":"object"}]},"ISSN":["1573-7462"],"issn-type":[{"value":"1573-7462","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,3]]},"assertion":[{"value":"10 April 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 May 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Yes, the authors have Conflict of interest as defined by Springer, or other interests that might be perceived to influence the results and\/or discussion reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"I confirm that I understand Artificial Intelligence Review is an open access journal that levies an article processing charge per articles accepted for publication. By submitting my article I agree to pay this charge in full if my article is accepted for publication.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Open Access"}},{"value":"The results\/data\/figures in this manuscript have not been published elsewhere, nor are they under consideration (from you or one of your Contributing Authors) by another publisher.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to publication"}},{"value":"All of the material is owned by the authors and\/or no permissions are required.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Third party material"}}],"article-number":"227"}}