{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,26]],"date-time":"2026-01-26T03:15:11Z","timestamp":1769397311601,"version":"3.49.0"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1109\/ijcnn64981.2025.11227906","type":"proceedings-article","created":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T18:46:15Z","timestamp":1763145975000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["Data-efficient LLM Fine-tuning for Code Generation"],"prefix":"10.1109","author":[{"given":"Weijie","family":"Lv","sequence":"first","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics,Nanjing,China"}]},{"given":"Xuan","family":"Xia","sequence":"additional","affiliation":[{"name":"Shenzhen Institute of Artificial Intelligence and Robotics for Society,Shenzhen,China"}]},{"given":"Sheng-Jun","family":"Huang","sequence":"additional","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics,Nanjing,China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Evaluating large language models trained on code","author":"Chen","year":"2021"},{"key":"ref2","article-title":"Code llama: Open foundation models for code","author":"Roziere","year":"2023"},{"key":"ref3","article-title":"Deepseek-coder: When the large language model meets programming\u2013the rise of code intelligence","author":"Guo","year":"2024"},{"key":"ref4","article-title":"Starcoder 2 and the stack v2: The next generation","author":"Lozhkov","year":"2024"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1108\/ws.2000.07949fab.004"},{"key":"ref6","article-title":"When scaling meets llm finetuning: The effect of data, model and finetuning method","author":"Zhang","year":"2024"},{"key":"ref7","article-title":"Code alpaca: An instruction-following llama model for code generation","author":"Chaudhary","year":"2023"},{"key":"ref8","article-title":"Wizardcoder: Empowering code large language models with evol-instruct","volume-title":"The Twelfth International Conference on Learning Representations","author":"Luo"},{"key":"ref9","article-title":"Magicoder: Source code is all you need","author":"Wei","year":"2023"},{"key":"ref10","article-title":"Wavecoder: Widespread and versatile enhanced instruction tuning with refined data generation","author":"Yu","year":"2023"},{"key":"ref11","article-title":"Alchemistcoder: Harmonizing and eliciting code capability by hindsight tuning on multi-source data","author":"Song","year":"2024"},{"key":"ref12","article-title":"Autocoder: Enhancing code large language model with\\textsc {AIEV-Instruct}","author":"Lei","year":"2024"},{"key":"ref13","article-title":"Gpt-4 technical report","author":"Achiam","year":"2023"},{"key":"ref14","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref15","article-title":"Less: Selecting influential data for targeted instruction tuning","author":"Xia","year":"2024"},{"key":"ref16","article-title":"Lima: Less is more for alignment","volume":"36","author":"Zhou","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.421"},{"key":"ref18","article-title":"Codegemma: Open code models based on gemma","author":"Team","year":"2024"},{"key":"ref19","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.754"},{"key":"ref21","article-title":"Alpagasus: Training a better alpaca with fewer data","volume-title":"The Twelfth International Conference on Learning Representations","author":"Chen"},{"key":"ref22","article-title":"#instag: Instruction tagging for analyzing supervised fine-tuning of large language models","volume-title":"The Twelfth International Conference on Learning Representations","author":"Lu"},{"key":"ref23","article-title":"Rethinking the instruction quality: Lift is what you need","author":"Xu","year":"2023"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.195"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.37"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01578"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.648"},{"key":"ref28","article-title":"Shed: Shapley-based automated dataset refinement for instruction fine-tuning","author":"He","year":"2024"},{"key":"ref29","first-page":"722","article-title":"Submodular combinatorial information measures with applications in machine learning","volume-title":"Algorithmic Learning Theory.","author":"Iyer","year":"2021"},{"key":"ref30","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/D19-1410","article-title":"Sentence-bert: Sentence embeddings using siamese bert-networks","volume-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing","author":"Reimers"},{"issue":"140","key":"ref31","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"Journal of machine learning research"},{"key":"ref32","article-title":"Evaluating large language models trained on code","author":"Chen","year":"2021"},{"key":"ref33","article-title":"Is your code generated by chatGPT really correct? rigorous evaluation of large language models for code generation","volume-title":"Thirty-seventh Conference on Neural Information Processing Systems","author":"Liu"},{"key":"ref34","article-title":"Program synthesis with large language models","author":"Austin","year":"2021"},{"key":"ref35","article-title":"Teaching large language models to self-debug","volume-title":"The Twelfth International Conference on Learning Representations","author":"Chen"},{"key":"ref36","article-title":"Decoupled weight decay regularization","volume-title":"International Conference on Learning Representations","author":"Loshchilov"}],"event":{"name":"2025 International Joint Conference on Neural Networks (IJCNN)","location":"Rome, Italy","start":{"date-parts":[[2025,6,30]]},"end":{"date-parts":[[2025,7,5]]}},"container-title":["2025 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11227166\/11227148\/11227906.pdf?arnumber=11227906","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T07:11:30Z","timestamp":1763190690000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11227906\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/ijcnn64981.2025.11227906","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]}}}