{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T10:02:40Z","timestamp":1777888960312,"version":"3.51.4"},"reference-count":68,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"NSF of China","doi-asserted-by":"publisher","award":["62131003,62102034"],"award-info":[{"award-number":["62131003,62102034"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001459","name":"Ministry of Education, Singapore","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001459","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccv51701.2025.00426","type":"proceedings-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:45:49Z","timestamp":1777491949000},"page":"4475-4485","source":"Crossref","is-referenced-by-count":0,"title":["Zeroth-Order Fine-Tuning of LLMs in Random Subspaces"],"prefix":"10.1109","author":[{"given":"Ziming","family":"Yu","sequence":"first","affiliation":[{"name":"Beijing Normal University"}]},{"given":"Pan","family":"Zhou","sequence":"additional","affiliation":[{"name":"Singapore Management University"}]},{"given":"Sike","family":"Wang","sequence":"additional","affiliation":[{"name":"Beijing Normal University"}]},{"given":"Jia","family":"Li","sequence":"additional","affiliation":[{"name":"Beijing Normal University"}]},{"given":"Mi","family":"Tian","sequence":"additional","affiliation":[{"name":"TAL Education Group"}]},{"given":"Hua","family":"Huang","sequence":"additional","affiliation":[{"name":"Beijing Normal University"}]}],"member":"263","reference":[{"key":"ref1","author":"Achiam","year":"2023","journal-title":"GPT-4 technical report"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.568"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/0925-2312(93)90006-O"},{"key":"ref4","article-title":"The second PASCAL recognising textual entailment challenge","volume-title":"Proceedings of the Second PASCAL Challenges Workshop on Recognising Textual Entailment","author":"Bar Haim","year":"2006"},{"key":"ref5","article-title":"The fifth PASCAL recognizing textual entailment challenge","volume-title":"Proceedings of the Second Text Analysis Conference","author":"Bentivogli","year":"2009"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1075"},{"key":"ref7","first-page":"1877","article-title":"Language models are few-shot learners","author":"Tom","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref8","author":"Chen","year":"2016","journal-title":"Training deep nets with sublinear memory cost"},{"key":"ref9","article-title":"Enhancing zeroth-order fine-tuning for language models with low-rank structures","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Chen","year":"2025"},{"key":"ref10","article-title":"BoolQ: Exploring the surprising difficulty of natural yes\/no questions","volume-title":"Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","author":"Clark","year":"2019"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/11736790_9"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1189"},{"key":"ref13","article-title":"The commitmentbank: Investigating projection in naturally occurring discourse","volume-title":"Proceedings of Sinn und Bedeutung 23","author":"De Marneffe","year":"2019"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.52202\/068431-2198"},{"key":"ref15","article-title":"8-bit optimizers via block-wise quantization","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Dettmers","year":"2022"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.52202\/075280-0441"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-023-00626-4"},{"key":"ref18","first-page":"2368","article-title":"DROP: A reading comprehension benchmark requiring discrete reasoning over paragraphs","volume-title":"Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","author":"Dua","year":"2019"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2015.2409256"},{"key":"ref20","article-title":"Variance-reduced zeroth-order methods for fine-tuning language models","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Gautam","year":"2024"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.3115\/1654536.1654538"},{"key":"ref22","author":"Guo","year":"2024","journal-title":"Zeroth-order fine-tuning of LLMs with extreme sparsity"},{"key":"ref23","article-title":"Flora: Lowrank adapters are secretly gradient compressors","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Hao","year":"2024"},{"key":"ref24","article-title":"LoRA: Low-rank adaptation of large language models","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Hu","year":"2022"},{"key":"ref25","article-title":"Query complexity of derivative-free optimization","author":"Jamieson","year":"2012","journal-title":"Advances in Neural Information Processing Systems, 25"},{"key":"ref26","first-page":"3100","article-title":"Improved zeroth-order variance reduced algorithms and analysis for nonconvex optimization","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Ji","year":"2019"},{"key":"ref27","author":"Albert","year":"2023","journal-title":"Mistral 7B"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i16.29796"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1023"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1406.3269"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s10589-021-00271-w"},{"key":"ref32","article-title":"Fine-tuning can distort pretrained features and underperform out-of-distribution","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Kumar","year":"2022"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"ref34","article-title":"The winograd schema challenge","volume-title":"Proceedings of the International Conference on the Principles of Knowledge Representation and Reasoning","author":"Levesque","year":"2012"},{"key":"ref35","article-title":"Memory efficient optimizers with 4-bit states","author":"Li","year":"2024","journal-title":"Advances in Neural Information Processing Systems, 36"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acllong.353"},{"key":"ref37","article-title":"Zeroth-order stochastic variance reduction for nonconvex optimization","author":"Liu","year":"2018","journal-title":"Advances in Neural Information Processing Systems, 31"},{"key":"ref38","article-title":"RoBERTa: A robustly optimized BERT pretraining approach","author":"Liu","journal-title":"arXiv:1907.11692"},{"key":"ref39","author":"Liu","year":"2024","journal-title":"Sparse MeZO: Less parameters for better performance in zeroth-order LLM fine-tuning"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.52202\/075280-2308"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/s10208-015-9296-2"},{"key":"ref42","first-page":"512","article-title":"What is being transferred in transfer learning?","volume":"33","author":"Neyshabur","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/s10957-024-02561-9"},{"key":"ref44","first-page":"1267","article-title":"WiC: the word-in-context dataset for evaluating contextsensitive meaning representations","volume-title":"Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","author":"Pilehvar","year":"2019"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/d16-1264"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1137\/22M1488569"},{"key":"ref47","article-title":"Choice of plausible alternatives: An evaluation of commonsense causal reasoning","volume-title":"Proceedings of the AAAI Spring Symposium Series","author":"Roemmele","year":"2011"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/3474381"},{"key":"ref49","first-page":"44759","article-title":"Tag-LLM: Repurposing general-purpose LLMs for specialized domains","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Shen","year":"2024"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D13-1170"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D13-1170"},{"key":"ref52","author":"Solaiman","year":"2019","journal-title":"Release strategies and the social impacts of language models"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/9.119632"},{"key":"ref54","first-page":"3299","article-title":"meProp: Sparsified back propagation for accelerated deep learning with reduced overfitting","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Sun","year":"2017"},{"key":"ref55","article-title":"LLaMA: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"arXiv preprint"},{"key":"ref56","article-title":"Attention is all you need","author":"Vaswani","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref57","article-title":"SuperGLUE: A stickier benchmark for generalpurpose language understanding systems","author":"Wang","journal-title":"arXiv: 1905.00537, 2019. 7"},{"key":"ref58","author":"Wang","year":"2024","journal-title":"MLAE: Masked LoRA experts for visual parameter-efficient finetuning"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1101"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.56"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00760"},{"key":"ref62","first-page":"4429","article-title":"Zeroth-order optimization with weak dimension dependency","volume-title":"Proceedings of the Annual Conference on Learning Theory","author":"Yue","year":"2023"},{"key":"ref63","article-title":"ReCoRD: Bridging the gap between human and machine commonsense reading comprehension","author":"Zhang","year":"2018","journal-title":"arXiv preprint"},{"key":"ref64","author":"Zhang","year":"2022","journal-title":"OPT: Open pre-trained transformer language models"},{"key":"ref65","first-page":"59173","article-title":"Revisiting zeroth-order optimization for memoryefficient LLM fine-tuning: A benchmark","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Zhang","year":"2024"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.95"},{"key":"ref67","article-title":"Galore: Memoryefficient LLM training by gradient low-rank projection","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Zhao","year":"2024"},{"key":"ref68","article-title":"Second-order fine-tuning without pain for LMMs: A Hessian informed zeroth-order optimizer","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Zhao","year":"2025"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11443115\/11443287\/11445906.pdf?arnumber=11445906","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T05:03:34Z","timestamp":1777611814000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11445906\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":68,"URL":"https:\/\/doi.org\/10.1109\/iccv51701.2025.00426","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}