{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T07:09:18Z","timestamp":1761894558434,"version":"build-2065373602"},"reference-count":42,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1109\/icme59968.2025.11209076","type":"proceedings-article","created":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T17:57:42Z","timestamp":1761847062000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["TACOS: Open Tagging and Comparative Scoring for Instruction Fine-Tuning Data Selection"],"prefix":"10.1109","author":[{"given":"Xixiang","family":"He","sequence":"first","affiliation":[{"name":"National University of Defense Technology,Changsha,China"}]},{"given":"Hao","family":"Yu","sequence":"additional","affiliation":[{"name":"Intelligent Game and Decision Lah,Beijing,China"}]},{"given":"Qiyao","family":"Sun","sequence":"additional","affiliation":[{"name":"National University of Defense Technology,Changsha,China"}]},{"given":"Ao","family":"Cheng","sequence":"additional","affiliation":[{"name":"National University of Defense Technology,Changsha,China"}]},{"given":"Tailai","family":"Zhang","sequence":"additional","affiliation":[{"name":"National University of Defense Technology,Changsha,China"}]},{"given":"Cong","family":"Liu","sequence":"additional","affiliation":[{"name":"National University of Defense Technology,Changsha,China"}]},{"given":"Shuxuan","family":"Guo","sequence":"additional","affiliation":[{"name":"Intelligent Game and Decision Lah,Beijing,China"}]}],"member":"263","reference":[{"article-title":"Finetuned language models are zero-shot learners","volume-title":"ICLR","author":"Wei","key":"ref1"},{"article-title":"LIMA: less is more for alignment","volume-title":"NeurIPS","author":"Zhou","key":"ref2"},{"article-title":"Alpagasus: Training a better alpaca with fewer data","volume-title":"ICLR","author":"Chen","key":"ref3"},{"article-title":"Improving translation faithfulness of large language models via augmenting instructions","year":"2023","author":"Chen","key":"ref4"},{"article-title":"What makes good data for alignment? a comprehensive study of automatic data selection in instruction tuning","year":"2023","author":"Liu","key":"ref5"},{"article-title":"Long is more for alignment: A simple but tough-to-beat baseline for instruction fine-tuning","year":"2024","author":"Zhao","key":"ref6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.421"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.679"},{"article-title":"Mods: Model-oriented data selection for instruction tuning","year":"2023","author":"Du","key":"ref9"},{"article-title":"Orca: Progressive learning from complex explanation traces of gpt-4","year":"2023","author":"Mukherjee","key":"ref10"},{"article-title":"A preliminary study of the intrinsic relationship between complexity and alignment","year":"2023","author":"Zhao","key":"ref11"},{"article-title":"The flan collection: Designing data and methods for effective instruction tuning","volume-title":"ICML","author":"Longpre","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1363"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.574"},{"article-title":"Instruction mining: High-quality instruction data selection for large language models","year":"2023","author":"Cao","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-srw.6"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/s10506-024-09401-1"},{"article-title":"Training language models to follow instructions with human feedback","volume-title":"NeurIPS","author":"Ouyang","key":"ref18"},{"article-title":"Understanding the effects of rlhf on llm generalisation and diversity","year":"2023","author":"Kirk","key":"ref19"},{"article-title":"Stanford alpaca: An instruction-following llama model","year":"2023","author":"Taori","key":"ref20"},{"article-title":"Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality","year":"2023","author":"Chiang","key":"ref21"},{"journal-title":"arXiv:2406.12793, 2024","article-title":"Chatglm: A family of large language models from glm-130b to glm-4 all tools","author":"GLM","key":"ref22"},{"article-title":"Llama 2: Open foundation and fine-tuned chat models","year":"2023","author":"Touvron","key":"ref23"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.340"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.112"},{"article-title":"# instag: Instruction tagging for analyzing supervised fine-tuning of large language models","volume-title":"ICLR","author":"Lu","key":"ref26"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/335191.335372"},{"article-title":"Maybe only 0.5% data is needed: A preliminary exploration of low training data instruction tuning","year":"2023","author":"Chen","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.846"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CSCI.2017.252"},{"article-title":"Self-evolved diverse data sampling for efficient instruction tuning","year":"2023","author":"Wu","key":"ref31"},{"article-title":"Gpt-4 technical report","year":"2023","author":"Achiam","key":"ref32"},{"article-title":"Mistral 7b","year":"2023","author":"Jiang","key":"ref33"},{"article-title":"Wizardlm: Empowering large language models to follow complex instructions","year":"2023","author":"Xu","key":"ref34"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-demos.38"},{"article-title":"Koala: A dialogue model for academic research","year":"2023","author":"Geng","key":"ref36"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.754"},{"key":"ref38","article-title":"Judging llm-as-a-judge with mt-bench and chatbot arena","author":"Zheng","year":"2024","journal-title":"NeurIPS"},{"article-title":"Neftune: Noisy embeddings improve instruction finetuning","volume-title":"ICLR","author":"Jain","key":"ref39"},{"article-title":"Camels in a changing climate: Enhancing lm adaptation with tulu 2","year":"2023","author":"Ivison","key":"ref40"},{"key":"ref41","article-title":"ROUGE: A package for automatic evaluation of summaries","author":"Lin","year":"2004","journal-title":"ACL"},{"key":"ref42","doi-asserted-by":"crossref","DOI":"10.3115\/1073083.1073135","article-title":"Bleu: a method for automatic evaluation of machine translation","volume-title":"ACL","author":"Papineni"}],"event":{"name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","start":{"date-parts":[[2025,6,30]]},"location":"Nantes, France","end":{"date-parts":[[2025,7,4]]}},"container-title":["2025 IEEE International Conference on Multimedia and Expo (ICME)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11208895\/11208897\/11209076.pdf?arnumber=11209076","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T05:30:39Z","timestamp":1761888639000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11209076\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/icme59968.2025.11209076","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]}}}