{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:16:19Z","timestamp":1765307779555,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":70,"publisher":"ACM","funder":[{"name":"Strategic Priority Research Program of Chinese Academy of Sciences","award":["XDA0480200"],"award-info":[{"award-number":["XDA0480200"]}]},{"name":"National Natural Science Foundations of China","award":["62306310"],"award-info":[{"award-number":["62306310"]}]},{"name":"Beijing Natural Science Foundation","award":["L242093"],"award-info":[{"award-number":["L242093"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755520","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:44:48Z","timestamp":1761371088000},"page":"4619-4628","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["EvoVLMA: Evolutionary Vision-Language Model Adaptation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2256-8815","authenticated-orcid":false,"given":"Kun","family":"Ding","sequence":"first","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1385-3224","authenticated-orcid":false,"given":"Ying","family":"Wang","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2089-9733","authenticated-orcid":false,"given":"Shiming","family":"Xiang","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"FunBO: Discovering Acquisition Functions for Bayesian Optimization with FunSearch. CoRR","author":"Aglietti Virginia","year":"2024","unstructured":"Virginia Aglietti, Ira Ktena, Jessica Schrouff, Eleni Sgouritsa, Francisco J. R. Ruiz, Alan Malek, Alexis Bellot, and Silvia Chiappa. 2024. FunBO: Discovering Acquisition Functions for Bayesian Optimization with FunSearch. CoRR, Vol. abs\/2406.04824 (2024)."},{"key":"e_1_3_2_1_2_1","first-page":"2623","article-title":"Optuna","author":"Akiba Takuya","year":"2019","unstructured":"Takuya Akiba, Shotaro Sano, Toshihiko Yanase, Takeru Ohta, and Masanori Koyama. 2019. Optuna: A Next-generation Hyperparameter Optimization Framework. In ACM SIGKDD. 2623-2631.","journal-title":"A Next-generation Hyperparameter Optimization Framework. In ACM SIGKDD."},{"key":"e_1_3_2_1_3_1","first-page":"6709","article-title":"ResearchAgent: Iterative Research Idea Generation over Scientific Literature with Large Language Models","author":"Baek Jinheon","year":"2025","unstructured":"Jinheon Baek, Sujay Kumar Jauhar, Silviu Cucerzan, and Sung Ju Hwang. 2025. ResearchAgent: Iterative Research Idea Generation over Scientific Literature with Large Language Models. In NAACL-HLT. 6709-6738.","journal-title":"NAACL-HLT."},{"key":"e_1_3_2_1_4_1","first-page":"9448","article-title":"ObjectNet: A large-scale bias-controlled dataset for pushing the limits of object recognition models","author":"Barbu Andrei","year":"2019","unstructured":"Andrei Barbu, David Mayo, Julian Alverio, William Luo, Christopher Wang, Dan Gutfreund, Josh Tenenbaum, and Boris Katz. 2019. ObjectNet: A large-scale bias-controlled dataset for pushing the limits of object recognition models. In NeurIPS. 9448-9458.","journal-title":"NeurIPS."},{"key":"e_1_3_2_1_5_1","first-page":"446","article-title":"Food-101 - Mining Discriminative Components with Random Forests","author":"Bossard Lukas","year":"2014","unstructured":"Lukas Bossard, Matthieu Guillaumin, and Luc Van Gool. 2014. Food-101 - Mining Discriminative Components with Random Forests. In ECCV. 446-461.","journal-title":"ECCV."},{"key":"e_1_3_2_1_6_1","first-page":"7787","article-title":"EvoPrompting","author":"Chen Angelica","year":"2023","unstructured":"Angelica Chen, David Dohan, and David R. So. 2023a. EvoPrompting: Language Models for Code-Level Neural Architecture Search. In NeurIPS. 7787-7817.","journal-title":"Language Models for Code-Level Neural Architecture Search. In NeurIPS."},{"key":"e_1_3_2_1_7_1","first-page":"699","article-title":"Exploring Open-Vocabulary Semantic Segmentation from CLIP Vision Encoder Distillation Only","author":"Chen Jun","year":"2023","unstructured":"Jun Chen, Deyao Zhu, Guocheng Qian, Bernard Ghanem, Zhicheng Yan, Chenchen Zhu, Fanyi Xiao, Sean Chang Culatana, and Mohamed Elhoseiny. 2023c. Exploring Open-Vocabulary Semantic Segmentation from CLIP Vision Encoder Distillation Only. In ICCV. 699-710.","journal-title":"ICCV."},{"key":"e_1_3_2_1_8_1","first-page":"1597","article-title":"A Simple Framework for Contrastive Learning of Visual Representations","volume":"119","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey E. Hinton. 2020. A Simple Framework for Contrastive Learning of Visual Representations. In ICML, Vol. 119. 1597-1607.","journal-title":"ICML"},{"key":"e_1_3_2_1_9_1","unstructured":"Zhe Chen Yuchen Duan Wenhai Wang Junjun He Tong Lu Jifeng Dai and Yu Qiao. 2023b. Vision Transformer Adapter for Dense Predictions. In ICLR. 20 pages."},{"key":"e_1_3_2_1_10_1","first-page":"3606","article-title":"Describing Textures in the Wild","author":"Cimpoi Mircea","year":"2014","unstructured":"Mircea Cimpoi, Subhransu Maji, Iasonas Kokkinos, Sammy Mohamed, and Andrea Vedaldi. 2014. Describing Textures in the Wild. In CVPR. 3606-3613.","journal-title":"CVPR."},{"key":"e_1_3_2_1_11_1","unstructured":"DeepSeek-AI Aixin Liu Bei Feng and et al. 2024. DeepSeek-V3 Technical Report. CoRR Vol. abs\/2412.19437 (2024)."},{"key":"e_1_3_2_1_12_1","first-page":"7480","article-title":"Scaling Vision Transformers to 22 Billion Parameters","volume":"202","author":"Dehghani Mostafa","year":"2023","unstructured":"Mostafa Dehghani, Josip Djolonga, Basil Mustafa, and et al., 2023. Scaling Vision Transformers to 22 Billion Parameters. In ICML, Vol. 202. 7480-7512.","journal-title":"ICML"},{"key":"e_1_3_2_1_13_1","first-page":"248","article-title":"ImageNet: A large-scale hierarchical image database","author":"Deng Jia","year":"2009","unstructured":"Jia Deng, Wei Dong, Richard Socher, Li-Jia Li, Kai Li, and Li Fei-Fei. 2009. ImageNet: A large-scale hierarchical image database. In CVPR. 248-255.","journal-title":"CVPR."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.128290"},{"key":"e_1_3_2_1_15_1","volume-title":"Calibrated Cache Model for Few-Shot Vision-Language Model Adaptation. CoRR","author":"Ding Kun","year":"2024","unstructured":"Kun Ding, Qiang Yu, Haojian Zhang, Gaofeng Meng, and Shiming Xiang. 2024b. Calibrated Cache Model for Few-Shot Vision-Language Model Adaptation. CoRR, Vol. abs\/2410.08895 (2024)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i2.27918"},{"key":"e_1_3_2_1_17_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly Jakob Uszkoreit and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In ICLR. 21 pages."},{"key":"e_1_3_2_1_18_1","first-page":"17445","article-title":"E(2,)VPT","author":"Han Cheng","year":"2023","unstructured":"Cheng Han, Qifan Wang, Yiming Cui, Zhiwen Cao, Wenguan Wang, Siyuan Qi, and Dongfang Liu. 2023. E(2,)VPT: An Effective and Efficient Approach for Visual Prompt Tuning. In ICCV. 17445-17456.","journal-title":"In ICCV."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.swevo.2024.101741"},{"key":"e_1_3_2_1_20_1","first-page":"15979","article-title":"Masked Autoencoders Are Scalable Vision Learners","author":"He Kaiming","year":"2022","unstructured":"Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li, Piotr Doll\u00e1r, and Ross B. Girshick. 2022. Masked Autoencoders Are Scalable Vision Learners. In CVPR. 15979-15988.","journal-title":"CVPR."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2019.2918242"},{"key":"e_1_3_2_1_22_1","first-page":"9118","volume-title":"ICML","volume":"162","author":"Huang Wenlong","year":"2022","unstructured":"Wenlong Huang, Pieter Abbeel, Deepak Pathak, and Igor Mordatch. 2022. Language Models as Zero-Shot Planners: Extracting Actionable Knowledge for Embodied Agents. In ICML, Vol. 162. 9118-9147."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25178"},{"key":"e_1_3_2_1_24_1","first-page":"10540","article-title":"LLM Performance Predictors are good initializers for Architecture Search","author":"Jawahar Ganesh","year":"2024","unstructured":"Ganesh Jawahar, Muhammad Abdul-Mageed, Laks V. S. Lakshmanan, and Dujian Ding. 2024. LLM Performance Predictors are good initializers for Architecture Search. In ACL. 10540-10560.","journal-title":"ACL."},{"key":"e_1_3_2_1_25_1","first-page":"4904","volume-title":"ICML","volume":"139","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc V. Le, Yun-Hsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling Up Visual and Vision-Language Representation Learning With Noisy Text Supervision. In ICML, Vol. 139. 4904-4916."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"e_1_3_2_1_27_1","volume-title":"A survey on large language models for code generation. CoRR","author":"Jiang Juyong","year":"2024","unstructured":"Juyong Jiang, Fan Wang, Jiasi Shen, Sungju Kim, and Sunghun Kim. 2024. A survey on large language models for code generation. CoRR, Vol. abs\/2406.00515 (2024)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.3233\/FAIA240489"},{"key":"e_1_3_2_1_29_1","first-page":"554","article-title":"3D Object Representations for Fine-Grained Categorization","author":"Krause Jonathan","year":"2013","unstructured":"Jonathan Krause, Michael Stark, Jia Deng, and Li Fei-Fei. 2013. 3D Object Representations for Fine-Grained Categorization. In ICCVW. 554-561.","journal-title":"ICCVW."},{"key":"e_1_3_2_1_30_1","unstructured":"Alex Krizhevsky Geoffrey Hinton et al. 2009. Learning multiple layers of features from tiny images. (2009)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2005.09.012"},{"key":"e_1_3_2_1_32_1","first-page":"3613","article-title":"ChatCite: LLM Agent with Human Workflow Guidance for Comparative Literature Summary","author":"Li Yutong","year":"2025","unstructured":"Yutong Li, Lu Chen, Aiwei Liu, Kai Yu, and Lijie Wen. 2025. ChatCite: LLM Agent with Human Workflow Guidance for Comparative Literature Summary. In COLING. 3613-3630.","journal-title":"COLING."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-023-10131-7"},{"key":"e_1_3_2_1_34_1","first-page":"32201","article-title":"Evolution of Heuristics","author":"Liu Fei","year":"2024","unstructured":"Fei Liu, Xialiang Tong, Mingxuan Yuan, Xi Lin, Fu Luo, Zhenkun Wang, Zhichao Lu, and Qingfu Zhang. 2024b. Evolution of Heuristics: Towards Efficient Automatic Algorithm Design Using Large Language Model. In ICML. 32201-32223.","journal-title":"In ICML."},{"key":"e_1_3_2_1_35_1","volume-title":"A Systematic Survey on Large Language Models for Algorithm Design. CoRR","author":"Liu Fei","year":"2024","unstructured":"Fei Liu, Yiming Yao, Ping Guo, Zhiyuan Yang, Zhe Zhao, Xi Lin, Xialiang Tong, Mingxuan Yuan, Zhichao Lu, Zhenkun Wang, and Qingfu Zhang. 2024c. A Systematic Survey on Large Language Models for Algorithm Design. CoRR, Vol. abs\/2410.14716 (2024)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CEC60901.2024.10611913"},{"key":"e_1_3_2_1_37_1","first-page":"86528","article-title":"Discovering preference optimization algorithms with and for large language models","volume":"37","author":"Lu Chris","year":"2024","unstructured":"Chris Lu, Samuel Holt, Claudio Fanconi, Alex Chan, Jakob Foerster, Mihaela van der Schaar, and Robert Lange. 2024. Discovering preference optimization algorithms with and for large language models. NeurIPS, Vol. 37 (2024), 86528-86573.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_38_1","volume-title":"Agent Reviewers: Domain-specific Multimodal Agents with Shared Memory for Paper Review. In ICML.","author":"Lu Kai","year":"2025","unstructured":"Kai Lu, Shixiong Xu, Jinqiu Li, Kun Ding, and Gaofeng Meng. 2025. Agent Reviewers: Domain-specific Multimodal Agents with Shared Memory for Paper Review. In ICML."},{"key":"e_1_3_2_1_39_1","volume-title":"LLM4SR: A Survey on Large Language Models for Scientific Research. CoRR","author":"Luo Ziming","year":"2025","unstructured":"Ziming Luo, Zonglin Yang, Zexin Xu, Wei Yang, and Xinya Du. 2025. LLM4SR: A Survey on Large Language Models for Scientific Research. CoRR, Vol. abs\/2501.04306 (2025)."},{"key":"e_1_3_2_1_40_1","volume-title":"Fine-Grained Visual Classification of Aircraft. CoRR","author":"Maji Subhransu","year":"2013","unstructured":"Subhransu Maji, Esa Rahtu, Juho Kannala, Matthew B. Blaschko, and Andrea Vedaldi. 2013. Fine-Grained Visual Classification of Aircraft. CoRR, Vol. abs\/1306.5151 (2013)."},{"key":"e_1_3_2_1_41_1","volume-title":"Identify Critical Nodes in Complex Network with Large Language Models. CoRR","author":"Mao Jinzhu","year":"2024","unstructured":"Jinzhu Mao, Dongyun Zou, Li Sheng, Siyi Liu, Chen Gao, Yue Wang, and Yong Li. 2024. Identify Critical Nodes in Complex Network with Large Language Models. CoRR, Vol. abs\/2403.03962 (2024)."},{"key":"e_1_3_2_1_42_1","first-page":"722","article-title":"Automated Flower Classification over a Large Number of Classes","author":"Nilsback Maria-Elena","year":"2008","unstructured":"Maria-Elena Nilsback and Andrew Zisserman. 2008. Automated Flower Classification over a Large Number of Classes. In ICVGIP. 722-729.","journal-title":"ICVGIP."},{"key":"e_1_3_2_1_43_1","article-title":"DINOv2: Learning Robust Visual Features without","volume":"2024","author":"Oquab Maxime","year":"2024","unstructured":"Maxime Oquab, Timoth\u00e9e Darcet, Th\u00e9o Moutakanni, and et al., 2024. DINOv2: Learning Robust Visual Features without Supervision. Trans. Mach. Learn. Res., Vol. 2024 (2024).","journal-title":"Supervision. Trans. Mach. Learn. Res."},{"key":"e_1_3_2_1_44_1","first-page":"3498","article-title":"Cats and dogs","author":"Parkhi Omkar M.","year":"2012","unstructured":"Omkar M. Parkhi, Andrea Vedaldi, Andrew Zisserman, and C. V. Jawahar. 2012. Cats and dogs. In CVPR. 3498-3505.","journal-title":"CVPR."},{"key":"e_1_3_2_1_45_1","first-page":"8748","volume-title":"ICML","volume":"139","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In ICML, Vol. 139. 8748-8763."},{"key":"e_1_3_2_1_46_1","first-page":"18061","article-title":"DenseCLIP","author":"Rao Yongming","year":"2022","unstructured":"Yongming Rao, Wenliang Zhao, Guangyi Chen, Yansong Tang, Zheng Zhu, Guan Huang, Jie Zhou, and Jiwen Lu. 2022. DenseCLIP: Language-Guided Dense Prediction with Context-Aware Prompting. In CVPR. 18061-18070.","journal-title":"Language-Guided Dense Prediction with Context-Aware Prompting. In CVPR."},{"key":"e_1_3_2_1_47_1","first-page":"5389","article-title":"Do ImageNet Classifiers Generalize to ImageNet?","author":"Recht Benjamin","year":"2019","unstructured":"Benjamin Recht, Rebecca Roelofs, Ludwig Schmidt, and Vaishaal Shankar. 2019. Do ImageNet Classifiers Generalize to ImageNet?. In ICML. 5389-5400.","journal-title":"ICML."},{"key":"e_1_3_2_1_48_1","first-page":"468","volume-title":"Nature","volume":"625","author":"Romera-Paredes Bernardino","year":"2024","unstructured":"Bernardino Romera-Paredes, Mohammadamin Barekatain, Alexander Novikov, Matej Balog, M. Pawan Kumar, Emilien Dupont, Francisco J. R. Ruiz, Jordan S. Ellenberg, Pengming Wang, Omar Fawzi, Pushmeet Kohli, and Alhussein Fawzi. 2024. Mathematical discoveries from program search with large language models. Nature, Vol. 625, 7995 (2024), 468-475."},{"key":"e_1_3_2_1_49_1","first-page":"39597","article-title":"Interpreting and Analysing CLIP's Zero-Shot Image Classification via Mutual Knowledge","volume":"37","author":"Sammani Fawaz","year":"2024","unstructured":"Fawaz Sammani and Nikos Deligiannis. 2024. Interpreting and Analysing CLIP's Zero-Shot Image Classification via Mutual Knowledge. In NeurIPS, Vol. 37. 39597-39631.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_50_1","first-page":"2683","article-title":"Navigation with Large Language Models: Semantic Guesswork as a Heuristic for Planning","volume":"229","author":"Shah Dhruv","year":"2023","unstructured":"Dhruv Shah, Michael Robert Equi, Blazej Osinski, Fei Xia, Brian Ichter, and Sergey Levine. 2023. Navigation with Large Language Models: Semantic Guesswork as a Heuristic for Planning. In CoRL, Vol. 229. 2683-2699.","journal-title":"CoRL"},{"key":"e_1_3_2_1_51_1","first-page":"15617","article-title":"FLAVA","author":"Singh Amanpreet","year":"2022","unstructured":"Amanpreet Singh, Ronghang Hu, Vedanuj Goswami, Guillaume Couairon, Wojciech Galuba, Marcus Rohrbach, and Douwe Kiela. 2022. FLAVA: A Foundational Language And Vision Alignment Model. In CVPR. 15617-15629.","journal-title":"A Foundational Language And Vision Alignment Model. In CVPR."},{"key":"e_1_3_2_1_52_1","volume-title":"Amir Roshan Zamir, and Mubarak Shah","author":"Soomro Khurram","year":"2012","unstructured":"Khurram Soomro, Amir Roshan Zamir, and Mubarak Shah. 2012. UCF101: A Dataset of 101 Human Actions Classes From Videos in The Wild. CoRR, Vol. abs\/1212.0402 (2012)."},{"key":"e_1_3_2_1_53_1","unstructured":"Patara Trirat Wonyong Jeong and Sung Ju Hwang. 2025. AutoML-Agent: A Multi-Agent LLM Framework for Full-Pipeline AutoML. In ICML."},{"key":"e_1_3_2_1_54_1","first-page":"11006","article-title":"CLIPPO: Image-and-Language Understanding from Pixels Only","author":"Tschannen Michael","year":"2023","unstructured":"Michael Tschannen, Basil Mustafa, and Neil Houlsby. 2023. CLIPPO: Image-and-Language Understanding from Pixels Only. In CVPR. 11006-11017.","journal-title":"CVPR."},{"key":"e_1_3_2_1_55_1","volume-title":"LLaMEA: A large language model evolutionary algorithm for automatically generating metaheuristics","author":"van Stein Niki","year":"2024","unstructured":"Niki van Stein and Thomas B\u00e4ck. 2024. LLaMEA: A large language model evolutionary algorithm for automatically generating metaheuristics. IEEE Transactions on Evolutionary Computation (2024)."},{"key":"e_1_3_2_1_56_1","first-page":"10506","article-title":"Learning Robust Global Representations by Penalizing Local Predictive Power","author":"Wang Haohan","year":"2019","unstructured":"Haohan Wang, Songwei Ge, Zachary C. Lipton, and Eric P. Xing. 2019. Learning Robust Global Representations by Penalizing Local Predictive Power. In NeurIPS. 10506-10518.","journal-title":"NeurIPS."},{"key":"e_1_3_2_1_57_1","unstructured":"Zhengbo Wang Jian Liang Lijun Sheng Ran He Zilei Wang and Tieniu Tan. 2024. A Hard-to-Beat Baseline for Training-free CLIP-based Adaptation. In ICLR."},{"key":"e_1_3_2_1_58_1","volume-title":"Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms. CoRR","author":"Xiao Han","year":"2017","unstructured":"Han Xiao, Kashif Rasul, and Roland Vollgraf. 2017. Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms. CoRR, Vol. abs\/1708.07747 (2017)."},{"key":"e_1_3_2_1_59_1","first-page":"3485","article-title":"SUN database: Large-scale scene recognition from abbey to zoo","author":"Xiao Jianxiong","year":"2010","unstructured":"Jianxiong Xiao, James Hays, Krista A. Ehinger, Aude Oliva, and Antonio Torralba. 2010. SUN database: Large-scale scene recognition from abbey to zoo. In CVPR. 3485-3492.","journal-title":"CVPR."},{"key":"e_1_3_2_1_60_1","volume-title":"Large language models as optimizers. CoRR","author":"Yang Chengrun","year":"2023","unstructured":"Chengrun Yang, Xuezhi Wang, Yifeng Lu, Hanxiao Liu, Quoc V Le, Denny Zhou, and Xinyun Chen. 2023. Large language models as optimizers. CoRR, Vol. abs\/2309.03409 (2023)."},{"key":"e_1_3_2_1_61_1","first-page":"6757","article-title":"Visual-Language Prompt Tuning with Knowledge-Guided Context Optimization","author":"Yao Hantao","year":"2023","unstructured":"Hantao Yao, Rui Zhang, and Changsheng Xu. 2023. Visual-Language Prompt Tuning with Knowledge-Guided Context Optimization. In CVPR. 6757-6767.","journal-title":"CVPR."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-70068-2_23"},{"key":"e_1_3_2_1_63_1","first-page":"43571","article-title":"ReEvo: Large Language Models as Hyper-Heuristics with Reflective Evolution","volume":"37","author":"Ye Haoran","year":"2024","unstructured":"Haoran Ye, Jiarui Wang, Zhiguang Cao, Federico Berto, Chuanbo Hua, Haeyeon Kim, Jinkyoo Park, and Guojie Song. 2024. ReEvo: Large Language Models as Hyper-Heuristics with Reflective Evolution. NeurIPS, Vol. 37 (2024), 43571-43608.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_64_1","article-title":"CoCa: Contrastive Captioners are Image-Text Foundation","volume":"2022","author":"Yu Jiahui","year":"2022","unstructured":"Jiahui Yu, Zirui Wang, Vijay Vasudevan, Legg Yeung, Mojtaba Seyedhosseini, and Yonghui Wu. 2022. CoCa: Contrastive Captioners are Image-Text Foundation Models. Trans. Mach. Learn. Res., Vol. 2022 (2022).","journal-title":"Models. Trans. Mach. Learn. Res."},{"key":"e_1_3_2_1_65_1","first-page":"58329","article-title":"tnGPS: discovering unknown tensor network structure search algorithms via large language models (LLMs)","author":"Zeng Junhua","year":"2024","unstructured":"Junhua Zeng, Chao Li, Zhun Sun, Qibin Zhao, and Guoxu Zhou. 2024. tnGPS: discovering unknown tensor network structure search algorithms via large language models (LLMs). In ICML. 58329-58347.","journal-title":"ICML."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19833-5_29"},{"key":"e_1_3_2_1_67_1","volume-title":"Automl-gpt: Automatic machine learning with gpt. CoRR","author":"Zhang Shujian","year":"2023","unstructured":"Shujian Zhang, Chengyue Gong, Lemeng Wu, Xingchao Liu, and Mingyuan Zhou. 2023. Automl-gpt: Automatic machine learning with gpt. CoRR, Vol. abs\/2305.02499 (2023)."},{"key":"e_1_3_2_1_68_1","volume-title":"How Can LLM Guide RL? A Value-Based Approach. CoRR","author":"Zhang Shenao","year":"2024","unstructured":"Shenao Zhang, Sirui Zheng, Shuqi Ke, Zhihan Liu, Wanxin Jin, Jianbo Yuan, Yingxiang Yang, Hongxia Yang, and Zhaoran Wang. 2024. How Can LLM Guide RL? A Value-Based Approach. CoRR, Vol. abs\/2402.16181 (2024)."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"},{"key":"e_1_3_2_1_70_1","first-page":"2605","article-title":"Not All Features Matter","author":"Zhu Xiangyang","year":"2023","unstructured":"Xiangyang Zhu, Renrui Zhang, Bowei He, Aojun Zhou, Dong Wang, Bin Zhao, and Peng Gao. 2023. Not All Features Matter: Enhancing Few-shot CLIP with Adaptive Prior Refinement. In ICCV. 2605-2615.","journal-title":"Enhancing Few-shot CLIP with Adaptive Prior Refinement. In ICCV."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755520","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:14:30Z","timestamp":1765307670000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755520"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":70,"alternative-id":["10.1145\/3746027.3755520","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755520","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}