{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,18]],"date-time":"2026-06-18T10:46:03Z","timestamp":1781779563936,"version":"3.54.5"},"reference-count":107,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,8,9]],"date-time":"2024-08-09T00:00:00Z","timestamp":1723161600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,8,9]],"date-time":"2024-08-09T00:00:00Z","timestamp":1723161600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"National Key Research and Development Program of China","award":["No.s 2020AAA0106000, 2020AAA0104304"],"award-info":[{"award-number":["No.s 2020AAA0106000, 2020AAA0104304"]}]},{"name":"Department of Electronic Engineering of SEIEE, SJTU","award":["NSFC62071292"],"award-info":[{"award-number":["NSFC62071292"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1007\/s11263-024-02196-3","type":"journal-article","created":{"date-parts":[[2024,8,9]],"date-time":"2024-08-09T19:02:34Z","timestamp":1723230154000},"page":"567-589","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":73,"title":["A Comprehensive Study on Robustness of Image Classification Models: Benchmarking and Rethinking"],"prefix":"10.1007","volume":"133","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1285-6792","authenticated-orcid":false,"given":"Chang","family":"Liu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yinpeng","family":"Dong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wenzhao","family":"Xiang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiao","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8294-6315","authenticated-orcid":false,"given":"Hang","family":"Su","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jun","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuefeng","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuan","family":"He","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hui","family":"Xue","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shibao","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,8,9]]},"reference":[{"key":"2196_CR1","unstructured":"Aldahdooh, A., Hamidouche, W., & Deforges, O. (2021). Reveal of vision transformers robustness against adversarial attacks. arXiv preprint arXiv:2106.03734"},{"key":"2196_CR2","unstructured":"Athalye, A., Carlini, N., & Wagner, D. (2018). Obfuscated gradients give a false sense of security: Circumventing defenses to adversarial examples. In International conference on machine learning, pp. 274\u2013283"},{"key":"2196_CR3","unstructured":"Bai, Y., Mei, J., Yuille, A. L., & Xie, C. (2021). Are transformers more robust than CNNs? In Advances in Neural information processing systems, pp. 26831\u201326843."},{"key":"2196_CR4","unstructured":"Barbu, A., Mayo, D., Alverio, J., Luo, W., Wang, C., Gutfreund, D., Tenenbaum, J., & Katz, B. (2019). Objectnet: A large-scale bias-controlled dataset for pushing the limits of object recognition models. In Advances in neural information processing systems, pp. 9453\u20139463"},{"key":"2196_CR5","unstructured":"Beyer, L., H\u00e1naff, O. J., Kolesnikov, A., Zhai, X., & Oord, A. V. D. (2020). Are we done with imagenet? arXiv preprint arXiv:2006.07159"},{"key":"2196_CR6","doi-asserted-by":"crossref","unstructured":"Bhojanapalli, S., Chakrabarti, A., Glasner, D., Li, D., Unterthiner, T., & Veit, A. (2021). Understanding robustness of transformers for image classification. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 10231\u201310241.","DOI":"10.1109\/ICCV48922.2021.01007"},{"key":"2196_CR7","doi-asserted-by":"crossref","unstructured":"Bolme, D. S., Draper, B. A., & Beveridge, J. R. (2009). Average of synthetic exact filters. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2105\u20132112.","DOI":"10.1109\/CVPR.2009.5206701"},{"issue":"7","key":"2196_CR8","doi-asserted-by":"publisher","first-page":"3289","DOI":"10.1109\/TVCG.2020.2969185","volume":"27","author":"K Cao","year":"2020","unstructured":"Cao, K., Liu, M., Su, H., et al. (2020). Analyzing the noise robustness of deep neural networks. IEEE Transactions on Visualization and Computer Graphics, 27(7), 3289\u20133304.","journal-title":"IEEE Transactions on Visualization and Computer Graphics"},{"key":"2196_CR9","doi-asserted-by":"crossref","unstructured":"Carlini, N., & Wagner, D. (2017). Towards evaluating the robustness of neural networks. In IEEE symposium on security and privacy, pp. 39\u201357","DOI":"10.1109\/SP.2017.49"},{"key":"2196_CR10","unstructured":"Carlini, N., Athalye, A., Papernot, N., Brendel, W., Rauber, J., Tsipras, D., Goodfellow, I., Madry, A. & Kurakin, A. (2019). On evaluating adversarial robustness. arXiv preprint arXiv:1902.06705"},{"key":"2196_CR11","doi-asserted-by":"crossref","unstructured":"Chen, T., Liu, S., Chang, S., Cheng, Y., Amini, L., & Wang, Z. (2020). Adversarial robustness: From self-supervised pre-training to fine-tuning. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 699\u2013708.","DOI":"10.1109\/CVPR42600.2020.00078"},{"key":"2196_CR12","doi-asserted-by":"crossref","unstructured":"Chen, X., Xie, S., & He, K. (2021). An empirical study of training self-supervised vision transformers. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 9640\u20139649.","DOI":"10.1109\/ICCV48922.2021.00950"},{"key":"2196_CR13","unstructured":"Cohen, J. M., Rosenfeld, E., & Kolter, J. Z. (2019). Certified adversarial robustness via randomized smoothing. In International conference on machine learning, pp. 1310\u20131320."},{"key":"2196_CR14","unstructured":"Croce, F., & Hein, M. (2020). Reliable evaluation of adversarial robustness with an ensemble of diverse parameter-free attacks. In International conference on machine learning, pp. 2206\u20132216."},{"key":"2196_CR15","unstructured":"Croce, F., Andriushchenko, M., Sehwag, V., Debenedetti, E., Flammarion, N., Chiang, M., Mittal, P. & Hein, M., (2021). Robustbench: A standardized adversarial robustness benchmark. In Neural information processing systems datasets and benchmarks track."},{"key":"2196_CR16","doi-asserted-by":"crossref","unstructured":"Cubuk, E.D., Zoph, B., Shlens, J. & Le, Q.V. (2020) Randaugment: Practical automated data augmentation with a reduced search space. In Advances in neural information processing systems, pp. 18613\u201318624","DOI":"10.1109\/CVPRW50498.2020.00359"},{"key":"2196_CR17","doi-asserted-by":"crossref","unstructured":"Debenedetti, E., Sehwag, V., & Mittal, P. (2022). A light recipe to train robust vision transformers. arXiv preprint arXiv:2209.07399","DOI":"10.1109\/SaTML54575.2023.00024"},{"key":"2196_CR18","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L. J., Li, K., & Fei-Fei, L. (2009) Imagenet: A large-scale hierarchical image database. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"2196_CR19","unstructured":"Dong, X., Luu, A.T., Lin, M., Yan, S. & Zhang, H. (2021). How should pre-trained language models be fine-tuned towards adversarial robustness? In Advances in neural information processing systems, pp. 4356\u20134369"},{"key":"2196_CR20","doi-asserted-by":"crossref","unstructured":"Dong, Y., Liao, F., Pang, T., Su, H., Zhu, J., Hu, X., & Li, J. (2018). Boosting adversarial attacks with momentum. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 9185\u20139193.","DOI":"10.1109\/CVPR.2018.00957"},{"key":"2196_CR21","doi-asserted-by":"crossref","unstructured":"Dong, Y., Pang, T., Su, H., & Zhu, J. (2019). Evading defenses to transferable adversarial examples by translation-invariant attacks. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 4312\u20134321.","DOI":"10.1109\/CVPR.2019.00444"},{"key":"2196_CR22","doi-asserted-by":"crossref","unstructured":"Dong, Y., Fu, Q.A., Yang, X., Pang, T., Su, H., Xiao, Z. & Zhu, J. (2020). Benchmarking adversarial robustness on image classification. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 321\u2013331.","DOI":"10.1109\/CVPR42600.2020.00040"},{"key":"2196_CR23","unstructured":"Dong, Y., Ruan, S., Su, H., Kang, C., Wei, X. & Zhu, J. (2022a). Viewfool: Evaluating the robustness of visual recognition to adversarial viewpoints. In Advances in neural information processing systems."},{"key":"2196_CR24","unstructured":"Dong, Y., Xu, K., Yang, X., Pang, T., Deng, Z., Su, H., & Zhu, J. (2022b). Exploring memorization in adversarial training. In International conference on learning representations"},{"key":"2196_CR25","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S. & Uszkoreit, J. (2021). An image is worth 16x16 words: Transformers for image recognition at scale. In International conference on learning representations"},{"key":"2196_CR26","unstructured":"El-Nouby, A., Touvron, H., Caron, M., Bojanowski, P., Douze, M., Joulin, A., Laptev, I., Neverova, N., Synnaeve, G., Verbeek, J. & Jegou, H. (2021). Xcit: Cross-covariance image transformers. In Advances in neural information processing systems, pp. 20014\u201320027."},{"key":"2196_CR27","unstructured":"Engstrom, L., Ilyas, A., Salman, H. et\u00a0al (2019). Robustness (python library). https:\/\/github.com\/MadryLab\/robustness"},{"key":"2196_CR28","unstructured":"Fang, A., Ilharco, G., Wortsman, M., Wan, Y., Shankar, V., Dave, A., & Schmidt, L. (2022). Data determines distributional robustness in contrastive language image pre-training (clip). In International conference on machine learning, PMLR, pp. 6216\u20136234."},{"key":"2196_CR29","doi-asserted-by":"crossref","unstructured":"Fang, Y., Wang, W., Xie, B., Sun, Q., Wu, L., Wang, X., Huang, T., Wang, X. & Cao, Y. (2023). Eva: Exploring the limits of masked visual representation learning at scale. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 19358\u201319369.","DOI":"10.1109\/CVPR52729.2023.01855"},{"key":"2196_CR30","unstructured":"Geirhos, R., Rubisch, P., Michaelis, C., Bethge, M., Wichmann, F. A., & Brendel, W. (2019). Imagenet-trained CNNs are biased towards texture; Increasing shape bias improves accuracy and robustness. In International conference on learning representations."},{"key":"2196_CR31","unstructured":"Goodfellow, I. J., Shlens, J., & Szegedy, C. (2015). Explaining and harnessing adversarial examples. In International conference on learning representations"},{"key":"2196_CR32","unstructured":"Gowal, S., Qin, C., Uesato, J., Mann, T., & Kohli, P. (2020). Uncovering the limits of adversarial training against norm-bounded adversarial examples. arXiv preprint arXiv:2010.03593"},{"key":"2196_CR33","unstructured":"Guo, C., Rana, M., Cisse, M., & Van Der Maaten, L. (2018). Countering adversarial images using input transformations. In International conference on learning representations."},{"key":"2196_CR34","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109308","volume":"137","author":"J Guo","year":"2023","unstructured":"Guo, J., Bao, W., Wang, J., Ma, Y., Gao, X., Xiao, G., Liu, A., Dong, J., Liu, X., & Wu, W. (2023). A comprehensive evaluation framework for deep model robustness. Pattern Recognition, 137, 109308.","journal-title":"Pattern Recognition"},{"key":"2196_CR35","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"2196_CR36","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., & Girshick, R. (2017). Mask r-cnn. In Proceedings of the IEEE international conference on computer vision, pp. 2961\u20132969.","DOI":"10.1109\/ICCV.2017.322"},{"key":"2196_CR37","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., & Girshick, R. (2022). Masked autoencoders are scalable vision learners. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 16000\u201316009.","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"2196_CR38","unstructured":"Hendrycks, D., & Dietterich, T. (2019). Benchmarking neural network robustness to common corruptions and perturbations. In International conference on learning representations."},{"key":"2196_CR39","unstructured":"Hendrycks, D., Lee, K., Mazeika, M. (2019). Using pre-training can improve model robustness and uncertainty. In International conference on machine learning, pp. 2712\u20132721."},{"key":"2196_CR40","unstructured":"Hendrycks, D., Mu, N., Cubuk, E.D., Zoph, B., Gilmer, J., & Lakshminarayanan, B. (2020). Augmix: A simple data processing method to improve robustness and uncertainty. In International conference on learning representations"},{"key":"2196_CR41","doi-asserted-by":"crossref","unstructured":"Hendrycks, D., Basart, S., Mu, N., Kadavath, S., Wang, F., Dorundo, E., Desai, R., Zhu, T., Parajuli, S., Guo, M., & Song, D. (2021a). The many faces of robustness: A critical analysis of out-of-distribution generalization. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 8340\u20138349.","DOI":"10.1109\/ICCV48922.2021.00823"},{"key":"2196_CR42","doi-asserted-by":"crossref","unstructured":"Hendrycks, D., Zhao, K., Basart, S., Steinhardt, J. & Song, D. (2021b). Natural adversarial examples. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 15262\u201315271.","DOI":"10.1109\/CVPR46437.2021.01501"},{"key":"2196_CR43","doi-asserted-by":"crossref","unstructured":"Howell, K. B. (2016). Principles of Fourier analysis. CRC Press.","DOI":"10.1201\/9781315181493"},{"key":"2196_CR44","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Van Der Maaten, L., & Weinberger, K. Q. (2017). Densely connected convolutional networks. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 4700\u20134708.","DOI":"10.1109\/CVPR.2017.243"},{"key":"2196_CR45","first-page":"5545","volume":"34","author":"H Huang","year":"2021","unstructured":"Huang, H., Wang, Y., Erfani, S., Gu, Q., Bailey, J., & Ma, X. (2021). Exploring architectural ingredients of adversarially robust deep neural networks. Advances in Neural Information Processing Systems, 34, 5545\u20135559.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2196_CR46","unstructured":"Ilharco, G., Wortsman, M., Wightman, R., Gordon, C., Carlini, N., Taori, R., Dave, A., Shankar, V., Namkoong, H., Miller, J., & Hajishirzi, H. (2021). Openclip. https:\/\/doi.org\/10.5281\/zenodo.5143773."},{"key":"2196_CR47","unstructured":"Ilyas, A., Santurkar, S., Tsipras, D., Engstrom, L., Tran, B., & Madry, A. (2019). Adversarial examples are not bugs, they are features. In Advances in neural information processing systems, pp. 125\u2013136"},{"key":"2196_CR48","unstructured":"Izmailov, P., Podoprikhin, D., Garipov, T., Vetrov, D. & Wilson, A. G. (2018). Averaging weights leads to wider optima and better generalization. In 34th Conference on uncertainty in artificial intelligence, pp. 876\u2013885."},{"key":"2196_CR49","unstructured":"Krizhevsky, A., & Hinton, G. (2009). Learning multiple layers of features from tiny images. Master\u2019s thesis, Department of Computer Science, University of Toronto."},{"key":"2196_CR50","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems, pp. 1097\u20131105."},{"key":"2196_CR51","unstructured":"Kurakin,A., Goodfellow, I., & Bengio, S. (2016). Adversarial examples in the physical world. arXiv preprint arXiv:1607.02533"},{"key":"2196_CR52","unstructured":"Kurakin, A., Goodfellow, I., & Bengio, S. (2017). Adversarial machine learning at scale. In International conference on learning representations."},{"key":"2196_CR53","doi-asserted-by":"crossref","unstructured":"Liao, F., Liang, M., Dong, Y., Pang, T., Hu, X. & Zhu, J. (2018). Defense against adversarial attacks using high-level representation guided denoiser. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 1778\u20131787.","DOI":"10.1109\/CVPR.2018.00191"},{"key":"2196_CR54","unstructured":"Lin, J., Song, C., He, K., Wang, L., & Hopcroft, J. E. (2020). Nesterov accelerated gradient and scale invariance for adversarial attacks. In International conference on learning representations"},{"key":"2196_CR55","doi-asserted-by":"crossref","unstructured":"Ling, X., Ji, S., Zou, J., Wang, J., Wu, C., Li, B. & Wang, T. (2019). Deepsec: A uniform platform for security analysis of deep learning model. In IEEE Symposium on Security and Privacy, pp. 673\u2013690.","DOI":"10.1109\/SP.2019.00023"},{"key":"2196_CR56","doi-asserted-by":"crossref","unstructured":"Liu, A., Liu, X., Fan, J., Ma, Y., Zhang, A., Xie, H. & Tao, D. (2019). Perceptual-sensitive gan for generating adversarial patches. In Proceedings of the AAAI conference on artificial intelligence, pp. 1028\u20131035.","DOI":"10.1609\/aaai.v33i01.33011028"},{"key":"2196_CR57","doi-asserted-by":"crossref","unstructured":"Liu, A., Liu, X., Yu, H., Zhang, C., Liu, Q., & Tao, D. (2021a). Training robust deep neural networks via adversarial noise propagation. In IEEE transactions on image processing","DOI":"10.1109\/TIP.2021.3082317"},{"key":"2196_CR58","unstructured":"Liu, A., Guo, J., Wang, J., Liang, S., Tao, R., Zhou, W., Liu, C., Liu, X., & Tao, D. (2023a). X-adv: Physical adversarial object attacks against x-ray prohibited item detection. arXiv preprint arXiv:2302.09491 1"},{"key":"2196_CR59","doi-asserted-by":"crossref","unstructured":"Liu, A., Tang, S., Liang, S., Gong, R., Wu, B., Liu, X., & Tao, D. (2023b). Exploring the relationship between architectural design and adversarially robust generalization. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 4096\u20134107.","DOI":"10.1109\/CVPR52729.2023.00399"},{"key":"2196_CR60","unstructured":"Liu Y, Chen X, Liu C, & Song, D. (2017). Delving into transferable adversarial examples and black-box attacks. In International conference on learning representations."},{"key":"2196_CR61","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., & Guo, B. (2021b). Swin transformer: Hierarchical vision transformer using shifted windows. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 10012\u201310022.","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2196_CR62","doi-asserted-by":"crossref","unstructured":"Liu, Z., Mao, H., Wu, C. Y., Feichtenhofer, C., Darrell, T. & Xie, S. (2022). A convnet for the 2020s. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 11976\u201311986","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"2196_CR63","unstructured":"Madry, A., Makelov, A., Schmidt, L., Tsipras, D., & Vladu, A. (2018). Towards deep learning models resistant to adversarial attacks. In International conference on learning representations."},{"key":"2196_CR64","doi-asserted-by":"crossref","unstructured":"Mahmood, K., Mahmood, R., & Van\u00a0Dijk, M. (2021). On the robustness of vision transformers to adversarial examples. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 7838\u20137847.","DOI":"10.1109\/ICCV48922.2021.00774"},{"key":"2196_CR65","unstructured":"Maiya, S. R., Ehrlich, M., Agarwal, V., Lim, S. N., Goldstein, T., & Shrivastava, A. (2021). A frequency perspective of adversarial robustness. arXiv preprint arXiv:2111.00861"},{"key":"2196_CR66","unstructured":"Mania, H., & Sra, S. (2020). Why do classifier accuracies show linear trends under distribution shift? arXiv preprint arXiv:2012.15483"},{"key":"2196_CR67","unstructured":"Miller, J.P., Taori, R., Raghunathan, A., Sagawa, S., Koh, P.W., Shankar, V., Liang, P., Carmon, Y., & Schmidt, L. (2021). Accuracy on the line: On the strong correlation between out-of-distribution and in-distribution generalization. In International conference on machine learning, PMLR, pp. 7721\u20137735."},{"key":"2196_CR68","unstructured":"Nicolae, M. I., Sinn, M., Tran, M. N., Buesser, B., Rawat, A., Wistuba, M., Zantedeschi, V., Baracaldo, N., Chen, B., Ludwig, H. & Molloy, I. M. (2018). Adversarial robustness toolbox v0. 4.0. arXiv preprint arXiv:1807.01069"},{"key":"2196_CR69","unstructured":"Pang, T., Yang, X., Dong, Y., Su, H. & Zhu, J. (2021). Bag of tricks for adversarial training. In International conference on learning representations"},{"key":"2196_CR70","unstructured":"Papernot, N., Faghri, F., Carlini, N., Goodfellow, I., Feinman, R., Kurakin, A., Xie, C., Sharma, Y., Brown, T., Roy, A., & Matyasko, A. (2016). Technical report on the cleverhans v2. 1.0 adversarial examples library. arXiv preprint arXiv:1610.00768"},{"key":"2196_CR71","doi-asserted-by":"crossref","unstructured":"Paul, S., & Chen, P. Y. (2022). Vision transformers are robust learners. In Proceedings of the AAAI conference on artificial intelligence, pp. 2071\u20132081.","DOI":"10.1609\/aaai.v36i2.20103"},{"key":"2196_CR72","unstructured":"Radford, A., Kim, J. W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., & Krueger, G. (2021). Learning transferable visual models from natural language supervision. In International conference on machine learning, pp. 8748\u20138763."},{"key":"2196_CR73","unstructured":"Rauber, J., Brendel, W., Bethge, M. (2017). Foolbox v0. 8.0: A python toolbox to benchmark the robustness of machine learning models. arXiv preprint arXiv:1707.04131"},{"key":"2196_CR74","unstructured":"Rebuffi, S. A., Gowal, S., Calian, D. A., Stimberg, F., Wiles, O., & Mann, T. A. (2021). Data augmentation can improve robustness. In Advances in neural information processing systems, pp. 29935\u201329948."},{"key":"2196_CR75","unstructured":"Recht, B., Roelofs, R., Schmidt, L., & Shankar, V. (2019). Do imagenet classifiers generalize to imagenet? In International conference on machine learning, pp. 5389\u20135400."},{"key":"2196_CR76","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R. & Farhadi, A. (2016). You only look once: Unified, real-time object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 779\u2013788.","DOI":"10.1109\/CVPR.2016.91"},{"key":"2196_CR77","unstructured":"Ren, S., He, K., Girshick, R. & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks. In Advances in neural information processing systems, pp. 91\u201399."},{"key":"2196_CR78","unstructured":"Rice, L., Wong, E., Kolter, Z. (2020). Overfitting in adversarially robust deep learning. In International conference on machine learning, pp. 8093\u20138104."},{"key":"2196_CR79","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P. & Ommer, B. (2022). High-resolution image synthesis with latent diffusion models. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 10684\u201310695.","DOI":"10.1109\/CVPR52688.2022.01042"},{"issue":"3","key":"2196_CR80","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., Huang, Z., Karpathy, A., Khosla, A., Bernstein, M., & Berg, A. C. (2015). Imagenet large scale visual recognition challenge. International Journal of Computer Vision, 115(3), 211\u2013252.","journal-title":"International Journal of Computer Vision"},{"key":"2196_CR81","unstructured":"Salman, H., Ilyas, A., Engstrom, L., Kapoor, A., & Madry, A. (2020). Do adversarially robust imagenet models transfer better? In Advances in Neural Information Processing Systems, pp. 3533\u20133545."},{"key":"2196_CR82","unstructured":"Schuhmann, C., Vencu, R., Beaumont, R., Kaczmarczyk, R., Mullis, C., Katta, A., Coombes, T., Jitsev, J., & Komatsuzaki, A. (2021). Laion-400m: Open dataset of clip-filtered 400 million image-text pairs. arXiv preprint arXiv:2111.02114"},{"key":"2196_CR83","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D. & Batra, D. (2017). Grad-cam: Visual explanations from deep networks via gradient-based localization. In Proceedings of the IEEE international conference on computer vision, pp. 618\u2013626.","DOI":"10.1109\/ICCV.2017.74"},{"key":"2196_CR84","unstructured":"Shao, R., Shi, Z., Yi, J., Chen, P. Y. & Hsieh, C. J. (2021). On the adversarial robustness of vision transformers. arXiv preprint arXiv:2103.15670"},{"key":"2196_CR85","doi-asserted-by":"crossref","unstructured":"Simonyan, K., & Zisserman, A. (2015). Very deep convolutional networks for large-scale image recognition. In International conference on learning representations.","DOI":"10.1109\/ICCV.2015.314"},{"key":"2196_CR86","unstructured":"Stutz, D., Hein, M., & Schiele, B. (2020) Confidence-calibrated adversarial training: Generalizing to unseen attacks. In International conference on machine learning, pp. 9155\u20139166."},{"key":"2196_CR87","unstructured":"Szegedy, C., Zaremba, W., Sutskever, I., Bruna, J., Erhan, D., Goodfellow, I. & Fergus, R. (2014). Intriguing properties of neural networks. In International conference on learning representations."},{"key":"2196_CR88","unstructured":"Tang, S., Gong, R., Wang, Y., Liu, A., Wang, J., Chen, X., Yu, F., Liu, X., Song, D., Yuille, A. & Torr, P. H. (2021). Robustart: Benchmarking robustness on architecture design and training techniques. arXiv preprint arXiv:2109.05211"},{"key":"2196_CR89","unstructured":"Tashiro, Y., Song, Y. & Ermon, S. (2020). Output diversified initialization for adversarial attacks. arXiv preprint arXiv:2003.06878 3"},{"key":"2196_CR90","unstructured":"Tram\u00e8r, F., Kurakin, A., Papernot, N., Goodfellow, I., Boneh, D., & McDaniel, P. (2018). Ensemble adversarial training: Attacks and defenses. In International conference on learning representations."},{"key":"2196_CR91","unstructured":"Tramer, F., Carlini, N., Brendel, W. & Madry, A. (2020). On adaptive attacks to adversarial example defenses. In Advances in neural information processing systems, pp. 1633\u20131645."},{"key":"2196_CR92","unstructured":"Tsipras, D., Santurkar, S., Engstrom, L., Turner, A., & Madry, A. (2019). Robustness may be at odds with accuracy. In International conference on learning representations."},{"key":"2196_CR93","unstructured":"Wang, H., Ge, S., Lipton, Z., & Xing, E. P. (2019). Learning robust global representations by penalizing local predictive power. In Advances in neural information processing systems, pp. 10506\u201310518."},{"key":"2196_CR94","doi-asserted-by":"crossref","unstructured":"Wang, X., & He, K. (2021). Enhancing the transferability of adversarial attacks through variance tuning. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 1924\u20131933.","DOI":"10.1109\/CVPR46437.2021.00196"},{"key":"2196_CR95","unstructured":"Wang, Z., Bai, Y., Zhou, Y., & Xie, C. (2022). Can CNNs be more robust than transformers? arXiv preprint arXiv:2206.03452"},{"key":"2196_CR96","unstructured":"Wong, E, & Kolter, Z. (2018). Provable defenses against adversarial examples via the convex outer adversarial polytope. In International conference on machine learning, pp. 5286\u20135295."},{"key":"2196_CR97","unstructured":"Xie, C., Wang, J., Zhang, Z., Ren, Z., & Yuille, A. (2018). Mitigating adversarial effects through randomization. In International conference on learning representations."},{"key":"2196_CR98","doi-asserted-by":"crossref","unstructured":"Xie, C., Wu, Y., Maaten, L. V. D., Yuille, A. L., & He, K. (2019a). Feature denoising for improving adversarial robustness. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 501\u2013509.","DOI":"10.1109\/CVPR.2019.00059"},{"key":"2196_CR99","doi-asserted-by":"crossref","unstructured":"Xie, C., Zhang, Z., Zhou, Y., Bai, S., Wang, J., Ren, Z., & Yuille, A. L. (2019b). Improving transferability of adversarial examples with input diversity. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 2730\u20132739.","DOI":"10.1109\/CVPR.2019.00284"},{"key":"2196_CR100","doi-asserted-by":"crossref","unstructured":"Xie, C., Tan, M., Gong, B., Wang, J., Yuille, A. L. & Le, Q. V. (2020). Adversarial examples improve image recognition. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 819\u2013828.","DOI":"10.1109\/CVPR42600.2020.00090"},{"key":"2196_CR101","doi-asserted-by":"crossref","unstructured":"Xie, Z., Zhang, Z., Cao, Y., Lin, Y., Bao, J., Yao, Z., Dai, Q., & Hu, H., (2022). Simmim: A simple framework for masked image modeling. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 9653\u20139663.","DOI":"10.1109\/CVPR52688.2022.00943"},{"key":"2196_CR102","unstructured":"Yin, D., Gontijo Lopes, R., Shlens, J., Cubuk, E. D. & Gilmer, J. (2019). A fourier perspective on model robustness in computer vision. Advances in Neural Information Processing Systems, 32."},{"key":"2196_CR103","doi-asserted-by":"crossref","unstructured":"Yuan, L., Chen, Y., Wang, T., Yu, W., Shi, Y., Jiang, Z.H., Tay, F. E., Feng, J., & Yan, S. (2021). Tokens-to-token vit: Training vision transformers from scratch on imagenet. In Proceedings of the IEEE\/CVF international conference on computer vision, pp. 558\u2013567.","DOI":"10.1109\/ICCV48922.2021.00060"},{"key":"2196_CR104","unstructured":"Zhang, H., Cisse, M., Dauphin, Y.N., & Lopez-Paz, D. (2018). mixup: Beyond empirical risk minimization. In International conference on learning representations"},{"key":"2196_CR105","unstructured":"Zhang, H., Yu, Y., Jiao, J., Xing, E., El Ghaoui, L., & Jordan, M. (2019a). Theoretically principled trade-off between robustness and accuracy. In International conference on machine learning, pp. 7472\u20137482."},{"key":"2196_CR106","unstructured":"Zhang, T., & Zhu, Z. (2019). Interpreting adversarially trained convolutional neural networks. In International conference on machine learning, pp. 7502\u20137511."},{"key":"2196_CR107","unstructured":"Zhang, Z., Jung, C., & Liang, X. (2019b). Adversarial defense by suppressing high-frequency components. arXiv preprint arXiv:1908.06566"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02196-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-024-02196-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02196-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,22]],"date-time":"2025-01-22T06:40:39Z","timestamp":1737528039000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-024-02196-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,9]]},"references-count":107,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,2]]}},"alternative-id":["2196"],"URL":"https:\/\/doi.org\/10.1007\/s11263-024-02196-3","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8,9]]},"assertion":[{"value":"4 February 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 July 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 August 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}