{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T14:44:52Z","timestamp":1779201892797,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Australian Research Council under Discovery Early Career Researcher Award","award":["DE220101075"],"award-info":[{"award-number":["DE220101075"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681213","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"8604-8612","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Towards Robustness Prompt Tuning with Fully Test-Time Adaptation for CLIP's Zero-Shot Generalization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-8397-3410","authenticated-orcid":false,"given":"Ran","family":"Wang","sequence":"first","affiliation":[{"name":"Australian Artificial Intelligence Institute, FEIT, University of Technology Sydney, Sydney, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9122-0775","authenticated-orcid":false,"given":"Hua","family":"Zuo","sequence":"additional","affiliation":[{"name":"Australian Artificial Intelligence Institute, FEIT, University of Technology Sydney, Sydney, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0602-6255","authenticated-orcid":false,"given":"Zhen","family":"Fang","sequence":"additional","affiliation":[{"name":"Australian Artificial Intelligence Institute, FEIT, University of Technology Sydney, Sydney, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0690-4732","authenticated-orcid":false,"given":"Jie","family":"Lu","sequence":"additional","affiliation":[{"name":"Australian Artificial Intelligence Institute, FEIT, University of Technology Sydney, Sydney, Australia"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10599-4_29"},{"key":"e_1_3_2_1_2_1","volume-title":"PLOT: Prompt Learning with Optimal Transport for Vision- Language Models. In ICLR.","author":"Chen Guangyi","year":"2023","unstructured":"Guangyi Chen, Weiran Yao, Xiangchen Song, Xinyue Li, Yongming Rao, and Kun Zhang. 2023. PLOT: Prompt Learning with Optimal Transport for Vision- Language Models. In ICLR."},{"key":"e_1_3_2_1_3_1","volume-title":"Improved Test-Time Adaptation for Domain Generalization. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023","author":"Chen Liang","year":"2023","unstructured":"Liang Chen, Yong Zhang, Yibing Song, Ying Shan, and Lingqiao Liu. 2023. Improved Test-Time Adaptation for Domain Generalization. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023, Vancouver, BC, Canada, June 17--24, 2023. IEEE, 24172--24182."},{"key":"e_1_3_2_1_4_1","volume-title":"Describing Textures in the Wild","author":"Cimpoi Mircea","unstructured":"Mircea Cimpoi, Subhransu Maji, Iasonas Kokkinos, Sammy Mohamed, and Andrea Vedaldi. 2014. Describing Textures in the Wild. In CVPR. IEEE Computer Society, 3606--3613."},{"key":"e_1_3_2_1_5_1","volume-title":"ImageNet: A large-scale hierarchical image database","author":"Deng Jia","unstructured":"Jia Deng, Wei Dong, Richard Socher, Li-Jia Li, Kai Li, and Li Fei-Fei. 2009. ImageNet: A large-scale hierarchical image database. In CVPR. IEEE Computer Society, 248--255."},{"key":"e_1_3_2_1_6_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT. Association for Computational Linguistics, 4171--4186."},{"key":"e_1_3_2_1_7_1","volume-title":"OpenPrompt: An Open-source Framework for Promptlearning","author":"Ding Ning","unstructured":"Ning Ding, Shengding Hu, Weilin Zhao, Yulin Chen, Zhiyuan Liu, Haitao Zheng, and Maosong Sun. 2022. OpenPrompt: An Open-source Framework for Promptlearning. In ACL. Association for Computational Linguistics, 105--113."},{"key":"e_1_3_2_1_8_1","volume-title":"Robust Mean Teacher for Continual and Gradual Test-Time Adaptation","author":"D\u00f6bler Mario","unstructured":"Mario D\u00f6bler, Robert A. Marsden, and Bin Yang. 2023. Robust Mean Teacher for Continual and Gradual Test-Time Adaptation. In CVPR. IEEE, 7704--7714."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2005.09.012"},{"key":"e_1_3_2_1_10_1","volume-title":"CLOOB: Modern Hopfield Networks with InfoLOOB Outperform CLIP. In NeurIPS.","author":"F\u00fcrst Andreas","year":"2022","unstructured":"Andreas F\u00fcrst, Elisabeth Rumetshofer, Johannes Lehner, Viet T. Tran, Fei Tang, Hubert Ramsauer, David P. Kreil, Michael Kopp, G\u00fcnter Klambauer, Angela Bitto, and Sepp Hochreiter. 2022. CLOOB: Modern Hopfield Networks with InfoLOOB Outperform CLIP. In NeurIPS."},{"key":"e_1_3_2_1_11_1","unstructured":"Xiuye Gu Tsung-Yi Lin Weicheng Kuo and Yin Cui. 2022. Open-vocabulary Object Detection via Vision and Language Knowledge Distillation. In ICLR. Open- Review.net."},{"key":"e_1_3_2_1_12_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2015. Deep Residual Learning for Image Recognition. arXiv:1512.03385 [cs.CV]"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2019.2918242"},{"key":"e_1_3_2_1_14_1","volume-title":"The Many Faces of Robustness: A Critical Analysis of Out-of-Distribution Generalization","author":"Hendrycks Dan","unstructured":"Dan Hendrycks, Steven Basart, Norman Mu, Saurav Kadavath, FrankWang, Evan Dorundo, Rahul Desai, Tyler Zhu, Samyak Parajuli, Mike Guo, Dawn Song, Jacob Steinhardt, and Justin Gilmer. 2021. The Many Faces of Robustness: A Critical Analysis of Out-of-Distribution Generalization. In ICCV. IEEE, 8320--8329."},{"key":"e_1_3_2_1_15_1","volume-title":"Dietterich","author":"Hendrycks Dan","year":"2019","unstructured":"Dan Hendrycks and Thomas G. Dietterich. 2019. Benchmarking Neural Network Robustness to Common Corruptions and Perturbations. In ICLR. OpenReview.net."},{"key":"e_1_3_2_1_16_1","volume-title":"ICML (Proceedings of Machine Learning Research","volume":"4916","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc V. Le, Yun-Hsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling Up Visual and Vision-Language Representation Learning With Noisy Text Supervision. In ICML (Proceedings of Machine Learning Research, Vol. 139). PMLR, 4904--4916."},{"key":"e_1_3_2_1_17_1","volume-title":"Muhammad Maaz, Salman Khan, and Fahad Shahbaz Khan.","author":"Khattak Muhammad Uzair","year":"2022","unstructured":"Muhammad Uzair Khattak, Hanoona Abdul Rasheed, Muhammad Maaz, Salman Khan, and Fahad Shahbaz Khan. 2022. MaPLe: Multi-modal Prompt Learning. CoRR abs\/2210.03117 (2022). arXiv:2210.03117"},{"key":"e_1_3_2_1_18_1","volume-title":"Muzammal Naseer, Salman H. Khan, Ming-Hsuan Yang, and Fahad Shahbaz Khan.","author":"Khattak Muhammad Uzair","year":"2023","unstructured":"Muhammad Uzair Khattak, Syed Talal Wasim, Muzammal Naseer, Salman H. Khan, Ming-Hsuan Yang, and Fahad Shahbaz Khan. 2023. Self-regulating Prompts: Foundational Model Adaptation without Forgetting. CoRR abs\/2307.06948 (2023). arXiv:2307.06948"},{"key":"e_1_3_2_1_19_1","volume-title":"3D Object Representations for Fine-Grained Categorization","author":"Krause Jonathan","unstructured":"Jonathan Krause, Michael Stark, Jia Deng, and Li Fei-Fei. 2013. 3D Object Representations for Fine-Grained Categorization. In ICCV. IEEE Computer Society, 554--561."},{"key":"e_1_3_2_1_20_1","volume-title":"Source Hypothesis Transfer for Unsupervised Domain Adaptation. In ICML (Proceedings of Machine Learning Research","volume":"6039","author":"Liang Jian","year":"2020","unstructured":"Jian Liang, Dapeng Hu, and Jiashi Feng. 2020. Do We Really Need to Access the Source Data? Source Hypothesis Transfer for Unsupervised Domain Adaptation. In ICML (Proceedings of Machine Learning Research, Vol. 119). PMLR, 6028--6039."},{"key":"e_1_3_2_1_21_1","volume-title":"A Systematic Survey of Prompting Methods in Natural Language Processing. ACM Comput. Surv. 55, 9","author":"Liu Pengfei","year":"2023","unstructured":"Pengfei Liu, Weizhe Yuan, Jinlan Fu, Zhengbao Jiang, Hiroaki Hayashi, and Graham Neubig. 2023. Pre-train, Prompt, and Predict: A Systematic Survey of Prompting Methods in Natural Language Processing. ACM Comput. Surv. 55, 9 (2023), 195:1--195:35."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Jianjie Luo Yehao Li Yingwei Pan Ting Yao Hongyang Chao and Tao Mei. 2021. CoCo-BERT: Improving Video-Language Pre-training with Contrastive Cross-modal Matching and Denoising. In ACM. ACM 5600--5608.","DOI":"10.1145\/3474085.3475703"},{"key":"e_1_3_2_1_23_1","volume-title":"Fine-Grained Visual Classification of Aircraft. CoRR abs\/1306.5151","author":"Maji Subhransu","year":"2013","unstructured":"Subhransu Maji, Esa Rahtu, Juho Kannala, Matthew B. Blaschko, and Andrea Vedaldi. 2013. Fine-Grained Visual Classification of Aircraft. CoRR abs\/1306.5151 (2013). arXiv:1306.5151"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Pietro Morerio Riccardo Volpi Ruggero Ragonesi and Vittorio Murino. 2020. Generative Pseudo-label Refinement for Unsupervised Domain Adaptation. In WACV. 3119--3128.","DOI":"10.1109\/WACV45572.2020.9093579"},{"key":"e_1_3_2_1_25_1","volume-title":"Automated Flower Classification over a Large Number of Classes","author":"Nilsback Maria-Elena","unstructured":"Maria-Elena Nilsback and Andrew Zisserman. 2008. Automated Flower Classification over a Large Number of Classes. In ICVGIP. IEEE Computer Society, 722--729."},{"key":"e_1_3_2_1_26_1","unstructured":"Shuaicheng Niu Jiaxiang Wu Yifan Zhang Zhiquan Wen Yaofo Chen Peilin Zhao and Mingkui Tan. 2023. Towards Stable Test-Time Adaptation in Dynamic Wild World. In ICLR. OpenReview.net."},{"key":"e_1_3_2_1_27_1","volume-title":"IEEE Conference on Computer Vision and Pattern Recognition. IEEE Computer Society, 3498--3505","author":"Parkhi Omkar M.","unstructured":"Omkar M. Parkhi, Andrea Vedaldi, Andrew Zisserman, and C. V. Jawahar. 2012. Cats and dogs. In IEEE Conference on Computer Vision and Pattern Recognition. IEEE Computer Society, 3498--3505."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3191696"},{"key":"e_1_3_2_1_29_1","volume-title":"Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever.","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. arXiv:2103.00020 [cs.CV]"},{"key":"e_1_3_2_1_30_1","unstructured":"Alec Radford Jeff Wu Rewon Child D. Luan Dario Amodei and Ilya Sutskever. 2019. Language models are unsupervised multitask learners."},{"key":"e_1_3_2_1_31_1","volume-title":"ICML (Proceedings of Machine Learning Research","volume":"5400","author":"Recht Benjamin","year":"2019","unstructured":"Benjamin Recht, Rebecca Roelofs, Ludwig Schmidt, and Vaishaal Shankar. 2019. Do ImageNet Classifiers Generalize to ImageNet?. In ICML (Proceedings of Machine Learning Research, Vol. 97). PMLR, 5389--5400."},{"key":"e_1_3_2_1_32_1","volume-title":"Hao Tan, Mohit Bansal, Anna Rohrbach, Kai- Wei Chang, Zhewei Yao, and Kurt Keutzer.","author":"Shen Sheng","year":"2021","unstructured":"Sheng Shen, Liunian Harold Li, Hao Tan, Mohit Bansal, Anna Rohrbach, Kai- Wei Chang, Zhewei Yao, and Kurt Keutzer. 2021. How Much Can CLIP Benefit Vision-and-Language Tasks? CoRR abs\/2107.06383 (2021). arXiv:2107.06383 https:\/\/arxiv.org\/abs\/2107.06383"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TFUZZ.2024.3389705"},{"key":"e_1_3_2_1_34_1","unstructured":"Manli Shu Weili Nie De-An Huang Zhiding Yu Tom Goldstein Anima Anandkumar and Chaowei Xiao. 2022. Test-Time Prompt Tuning for Zero-Shot Generalization in Vision-Language Models. arXiv:2209.07511 [cs.CV]"},{"key":"e_1_3_2_1_35_1","volume-title":"Uncertainty Reduction for Model Adaptation in Semantic Segmentation","author":"Sivaprasad Prabhu Teja","unstructured":"Prabhu Teja Sivaprasad and Fran\u00e7ois Fleuret. 2021. Uncertainty Reduction for Model Adaptation in Semantic Segmentation. In CVPR. Computer Vision Foundation \/ IEEE, 9613--9623."},{"key":"e_1_3_2_1_36_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N. Gomez Lukasz Kaiser and Illia Polosukhin. 2017. Attention Is All You Need. arXiv:1706.03762 [cs.CL]"},{"key":"e_1_3_2_1_37_1","volume-title":"Tent: Fully Test-Time Adaptation by Entropy Minimization. In ICLR.","author":"Wang Dequan","year":"2021","unstructured":"Dequan Wang, Evan Shelhamer, Shaoteng Liu, Bruno A. Olshausen, and Trevor Darrell. 2021. Tent: Fully Test-Time Adaptation by Entropy Minimization. In ICLR."},{"key":"e_1_3_2_1_38_1","volume-title":"Xing","author":"Wang Haohan","year":"2019","unstructured":"Haohan Wang, Songwei Ge, Zachary C. Lipton, and Eric P. Xing. 2019. Learning Robust Global Representations by Penalizing Local Predictive Power. In NeurIPS. 10506--10518."},{"key":"e_1_3_2_1_39_1","unstructured":"Jun-KunWang and Andre Wibisono. 2023. Towards Understanding GD with Hard and Conjugate Pseudo-labels for Test-Time Adaptation. In ICLR. OpenReview.net."},{"key":"e_1_3_2_1_40_1","volume-title":"Luc Van Gool, and Dengxin Dai","author":"Fink Olga","year":"2022","unstructured":"QinWang, Olga Fink, Luc Van Gool, and Dengxin Dai. 2022. Continual Test-Time Domain Adaptation. In CVPR. IEEE, 7191--7201."},{"key":"e_1_3_2_1_41_1","volume-title":"AI 2023: Advances in Artificial Intelligence - 36th Australasian Joint Conference on Artificial Intelligence (Lecture Notes in Computer Science","author":"Wang Ran","unstructured":"Ran Wang, Hua Zuo, Zhen Fang, and Jie Lu. 2023. Multiple Teacher Model for Continual Test-Time Domain Adaptation. In AI 2023: Advances in Artificial Intelligence - 36th Australasian Joint Conference on Artificial Intelligence (Lecture Notes in Computer Science, Vol. 14471). Springer, 304--314. https:\/\/doi.org\/10.1007\/ 978--981--99--8388--9_25"},{"key":"e_1_3_2_1_42_1","unstructured":"Zhengbo Wang Jian Liang Ran He Nan Xu Zilei Wang and Tieniu Tan. 2023. Improving Zero-Shot Generalization for CLIP with Synthesized Prompts. arXiv:2307.07397 [cs.CV]"},{"key":"e_1_3_2_1_43_1","volume-title":"Hannaneh Hajishirzi, Ali Farhadi, Hongseok Namkoong, and Ludwig Schmidt.","author":"Wortsman Mitchell","year":"2021","unstructured":"Mitchell Wortsman, Gabriel Ilharco, Mike Li, Jong Wook Kim, Hannaneh Hajishirzi, Ali Farhadi, Hongseok Namkoong, and Ludwig Schmidt. 2021. Robust fine-tuning of zero-shot models. CoRR abs\/2109.01903 (2021). arXiv:2109.01903"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00653"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1609\/AAAI.V38I15.29590"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3369699"},{"key":"e_1_3_2_1_47_1","volume-title":"MEMO: Test Time Robustness via Adaptation and Augmentation. In NeurIPS.","author":"Zhang Marvin","year":"2022","unstructured":"Marvin Zhang, Sergey Levine, and Chelsea Finn. 2022. MEMO: Test Time Robustness via Adaptation and Augmentation. In NeurIPS."},{"key":"e_1_3_2_1_48_1","volume-title":"Generating Natural Adversarial Examples. CoRR abs\/1710.11342","author":"Zhao Zhengli","year":"2017","unstructured":"Zhengli Zhao, Dheeru Dua, and Sameer Singh. 2017. Generating Natural Adversarial Examples. CoRR abs\/1710.11342 (2017). arXiv:1710.11342"},{"key":"e_1_3_2_1_49_1","volume-title":"Chen Change Loy, and Ziwei Liu","author":"Zhou Kaiyang","year":"2022","unstructured":"Kaiyang Zhou, Jingkang Yang, Chen Change Loy, and Ziwei Liu. 2022. Conditional Prompt Learning for Vision-Language Models. In CVPR. IEEE, 16795--16804."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681213","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681213","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:02Z","timestamp":1750295882000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681213"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":50,"alternative-id":["10.1145\/3664647.3681213","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681213","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}