{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T21:15:36Z","timestamp":1771362936318,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","funder":[{"name":"Natural Science Foundation of Chongqing Municipality","award":["CSTB2024NSCQ-MSX0033"],"award-info":[{"award-number":["CSTB2024NSCQ-MSX0033"]}]},{"name":"Science and Technology Research Program of Chongqing Municipal Education Commission","award":["KJQN202300828"],"award-info":[{"award-number":["KJQN202300828"]}]},{"name":"Science and Technology Research Program of Chongqing Municipal Education Commission","award":["KJQN202500801"],"award-info":[{"award-number":["KJQN202500801"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,7]]},"DOI":"10.1145\/3779153.3779172","type":"proceedings-article","created":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T07:35:51Z","timestamp":1769499351000},"page":"124-129","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["GNP-FILTER: Gradient-Norm Proxy-Based Data Selection for Visual Instruction Tuning in Vison Language Model"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-4667-3243","authenticated-orcid":false,"given":"Changjiu","family":"Jiang","sequence":"first","affiliation":[{"name":"The Artificial Intelligence College, Chongqing Key Laboratory of IntelliSense and Blockchain Technology, Chongqing Technology and Business University, Chongqing, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3704-4595","authenticated-orcid":false,"given":"Bo","family":"Liu","sequence":"additional","affiliation":[{"name":"The Artificial Intelligence College, Chongqing Key Laboratory of IntelliSense and Blockchain Technology, Chongqing Technology and Business University, Chongqing, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1555-0577","authenticated-orcid":false,"given":"Jie","family":"Li","sequence":"additional","affiliation":[{"name":"The Artificial Intelligence College, Chongqing Key Laboratory of IntelliSense and Blockchain Technology, Chongqing Technology and Business University, Chongqing, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7677-5908","authenticated-orcid":false,"given":"Jinrui","family":"Qian","sequence":"additional","affiliation":[{"name":"The Artificial Intelligence College, Chongqing Key Laboratory of IntelliSense and Blockchain Technology, Chongqing Technology and Business University, Chongqing, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8258-0107","authenticated-orcid":false,"given":"Run","family":"Zeng","sequence":"additional","affiliation":[{"name":"The Artificial Intelligence College, Chongqing Key Laboratory of IntelliSense and Blockchain Technology, Chongqing Technology and Business University, Chongqing, Chongqing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4987-1773","authenticated-orcid":false,"given":"Xin","family":"Huang","sequence":"additional","affiliation":[{"name":"The Artificial Intelligence College, Chongqing Key Laboratory of IntelliSense and Blockchain Technology, Chongqing Technology and Business University, Chongqing, Chongqing, China"}]}],"member":"320","published-online":{"date-parts":[[2026,1,26]]},"reference":[{"key":"e_1_3_3_1_1_2","volume-title":"Advances in Neural Information Processing Systems 36 (NeurIPS","author":"Liu H","year":"2023","unstructured":"Liu H, Li C, Wu Q, Lee YJ. Visual Instruction Tuning. In: Advances in Neural Information Processing Systems 36 (NeurIPS 2023)."},{"key":"e_1_3_3_1_2_2","volume-title":"National Science Review","author":"Yin","year":"2024","unstructured":"S. Yin et al., \u2018A Survey on Multimodal Large Language Models\u2019, National Science Review, p. nwae403, Nov. 2024"},{"key":"e_1_3_3_1_3_2","volume-title":"A survey on data selection for language models[J]. arXiv preprint arXiv:2402.16827","author":"Albalak A","year":"2024","unstructured":"Albalak A, Elazar Y, Xie S M, et al. A survey on data selection for language models[J]. arXiv preprint arXiv:2402.16827, 2024."},{"key":"e_1_3_3_1_4_2","unstructured":"Paul Mansheej Surya Ganguli and Gintare Karolina Dziugaite. \"Deep learning on a data diet: Finding important examples early in training.\"\u00a0Advances in neural information processing systems\u00a034 (2021): 20596-20607."},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Chen Ruibo et al. \"Your vision-language model itself is a strong filter: Towards high-quality instruction tuning with data selection.\"\u00a0arXiv preprint arXiv:2402.12501\u00a0(2024).","DOI":"10.18653\/v1\/2024.findings-acl.246"},{"key":"e_1_3_3_1_6_2","unstructured":"Xia Mengzhou et al. \"Less: Selecting influential data for targeted instruction tuning.\"\u00a0arXiv preprint arXiv:2402.04333\u00a0(2024)."},{"key":"e_1_3_3_1_7_2","unstructured":"Li Ming et al. \"From quantity to quality: Boosting llm performance with self-guided data selection for instruction tuning.\"\u00a0arXiv preprint arXiv:2308.12032\u00a0(2023)."},{"key":"e_1_3_3_1_8_2","unstructured":"Liu Liangxin et al. \"Selectit: Selective instruction tuning for large language models via uncertainty-aware self-reflection.\"\u00a0arXiv preprint arXiv:2402.16705\u00a0(2024)."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Song Jielin et al. \"IterSelectTune: An Iterative Training Framework for Efficient Instruction-Tuning Data Selection.\"\u00a0arXiv preprint arXiv:2410.13464\u00a0(2024).","DOI":"10.1007\/978-981-95-0014-7_28"},{"key":"e_1_3_3_1_10_2","unstructured":"Yin Mingjia et al. \"Entropy law: The story behind data compression and llm performance.\"\u00a0arXiv preprint arXiv:2407.06645\u00a0(2024)."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Liu Zikang et al. \"Less is More: High-value Data Selection for Visual Instruction Tuning.\"\u00a0arXiv preprint arXiv:2403.09559\u00a0(2024).","DOI":"10.1145\/3746027.3755160"},{"key":"e_1_3_3_1_12_2","volume-title":"Improved baselines with visual instruction tuning.\"\u00a0Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Liu","year":"2024","unstructured":"Liu, Haotian, et al. \"Improved baselines with visual instruction tuning.\"\u00a0Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2024."},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"crossref","unstructured":"Hessel Jack et al. \"Clipscore: A reference-free evaluation metric for image captioning.\"\u00a0arXiv preprint arXiv:2104.08718\u00a0(2021).","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"key":"e_1_3_3_1_14_2","unstructured":"Marion Max et al. \"When less is more: Investigating data pruning for pretraining llms at scale.\"\u00a0arXiv preprint arXiv:2309.04564\u00a0(2023)."},{"key":"e_1_3_3_1_15_2","unstructured":"Abbas Amro et al. \"Semdedup: Data-efficient learning at web-scale through semantic deduplication.\"\u00a0arXiv preprint arXiv:2303.09540\u00a0(2023)."},{"key":"e_1_3_3_1_16_2","volume-title":"PmLR","author":"Radford","year":"2021","unstructured":"Radford, Alec, et al. \"Learning transferable visual models from natural language supervision.\"\u00a0International conference on machine learning. PmLR, 2021."},{"key":"e_1_3_3_1_17_2","volume-title":"Making the v in vqa matter: Elevating the role of image understanding in visual question answering.\"\u00a0Proceedings of the IEEE conference on computer vision and pattern recognition","author":"Goyal","year":"2017","unstructured":"Goyal, Yash, et al. \"Making the v in vqa matter: Elevating the role of image understanding in visual question answering.\"\u00a0Proceedings of the IEEE conference on computer vision and pattern recognition. 2017."},{"key":"e_1_3_3_1_18_2","unstructured":"Lu Pan et al. \"Learn to explain: Multimodal reasoning via thought chains for science question answering.\"\u00a0Advances in Neural Information Processing Systems\u00a035 (2022): 2507-2521."},{"key":"e_1_3_3_1_19_2","volume-title":"Towards vqa models that can read.\"\u00a0Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"Singh","year":"2019","unstructured":"Singh, Amanpreet, et al. \"Towards vqa models that can read.\"\u00a0Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2019."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Yin Shukang et al. \"A survey on multimodal large language models.\"\u00a0National Science Review\u00a011.12 (2024): nwae403.","DOI":"10.1093\/nsr\/nwae403"},{"key":"e_1_3_3_1_21_2","volume-title":"Mmbench: Is your multi-modal model an all-around player?.\"\u00a0European conference on computer vision","author":"Liu","year":"2024","unstructured":"Liu, Yuan, et al. \"Mmbench: Is your multi-modal model an all-around player?.\"\u00a0European conference on computer vision. Cham: Springer Nature Switzerland, 2024."},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"crossref","unstructured":"Li Yifan et al. \"Evaluating object hallucination in large vision-language models.\"\u00a0arXiv preprint arXiv:2305.10355\u00a0(2023).","DOI":"10.18653\/v1\/2023.emnlp-main.20"},{"key":"e_1_3_3_1_23_2","volume-title":"Vizwiz grand challenge: Answering visual questions from blind people.\"\u00a0Proceedings of the IEEE conference on computer vision and pattern recognition","author":"Gurari","year":"2018","unstructured":"Gurari, Danna, et al. \"Vizwiz grand challenge: Answering visual questions from blind people.\"\u00a0Proceedings of the IEEE conference on computer vision and pattern recognition. 2018."}],"event":{"name":"BDIOT 2025: 2025 9th International Conference on Big Data and Internet of Things","location":"Chongqing China","acronym":"BDIOT 2025"},"container-title":["Proceedings of the 2025 9th International Conference on Big Data and Internet of Things"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3779153.3779172","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T20:30:56Z","timestamp":1771360256000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3779153.3779172"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,7]]},"references-count":23,"alternative-id":["10.1145\/3779153.3779172","10.1145\/3779153"],"URL":"https:\/\/doi.org\/10.1145\/3779153.3779172","relation":{},"subject":[],"published":{"date-parts":[[2025,11,7]]},"assertion":[{"value":"2026-01-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}