{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T01:10:24Z","timestamp":1755825024431,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100006374","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2024M760357"],"award-info":[{"award-number":["2024M760357"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"Natural Science Foundation of Sichuan Province","doi-asserted-by":"publisher","award":["2025ZNSFSC1464"],"award-info":[{"award-number":["2025ZNSFSC1464"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Sichuan Central-Guided Local Science and Technology Development Program","award":["2023ZYD0165"],"award-info":[{"award-number":["2023ZYD0165"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1145\/3731715.3733341","type":"proceedings-article","created":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T18:29:43Z","timestamp":1750876183000},"page":"1322-1330","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Fine-grained Block Pruning with Tiny Sets for Vision Transformers"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-1612-610X","authenticated-orcid":false,"given":"Yilin","family":"Wang","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, University of Electronic Science and Technology of China, Chengdu, Sichuan, China and The Institute of Intelligent Computing, University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1986-8961","authenticated-orcid":false,"given":"Qiang","family":"Dong","sequence":"additional","affiliation":[{"name":"The Institute of Intelligent Computing, University of Electronic Science and Technology of China, Chengdu, Sichuan, China and Ubiquitous Intelligence and Trusted Services Key Laboratory of Sichuan Province, Chengdu, Sichuan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4839-0234","authenticated-orcid":false,"given":"Dongyang","family":"Zhang","sequence":"additional","affiliation":[{"name":"The Institute of Intelligent Computing, University of Electronic Science and Technology of China, Chengdu, Sichuan, China and Ubiquitous Intelligence and Trusted Services Key Laboratory of Sichuan Province, Chengdu, Sichuan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8259-2449","authenticated-orcid":false,"given":"Xin","family":"Hu","sequence":"additional","affiliation":[{"name":"The Institute of Intelligent Computing, University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8676-7429","authenticated-orcid":false,"given":"Tao","family":"He","sequence":"additional","affiliation":[{"name":"The Institute of Intelligent Computing, University of Electronic Science and Technology of China, Chengdu, Sichuan, China and Ubiquitous Intelligence and Trusted Services Key Laboratory of Sichuan Province, Chengdu, Sichuan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3712-2349","authenticated-orcid":false,"given":"Aiguo","family":"Chen","sequence":"additional","affiliation":[{"name":"The Institute of Intelligent Computing, University of Electronic Science and Technology of China, Chengdu, Sichuan, China and Ubiquitous Intelligence and Trusted Services Key Laboratory of Sichuan Province, Chengdu, Sichuan, China"}]}],"member":"320","published-online":{"date-parts":[[2025,6,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"The Tenth International Conference on Learning Representations, ICLR 2022","author":"Alizadeh Milad","year":"2022","unstructured":"Milad Alizadeh, Shyam A. Tailor, Luisa M. Zintgraf, Joost van Amersfoort, Sebastian Farquhar, Nicholas Donald Lane, and Yarin Gal. 2022. Prospect Pruning: Finding Trainable Weights at Initialization using Meta-Gradients. In The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, April 25--29, 2022. OpenReview.net."},{"key":"e_1_3_2_1_2_1","volume-title":"Adaptive Input Representations for Neural Language Modeling. In 7th International Conference on Learning Representations, ICLR 2019","author":"Baevski Alexei","year":"2019","unstructured":"Alexei Baevski and Michael Auli. 2019. Adaptive Input Representations for Neural Language Modeling. In 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6--9, 2019. OpenReview.net."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3-030--58452--8_13"},{"key":"e_1_3_2_1_4_1","volume-title":"Chasing Sparsity in Vision Transformers: An End-to-End Exploration. In Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021","author":"Chen Tianlong","year":"2021","unstructured":"Tianlong Chen, Yu Cheng, Zhe Gan, Lu Yuan, Lei Zhang, and Zhangyang Wang. 2021. Chasing Sparsity in Vision Transformers: An End-to-End Exploration. In Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6--14, 2021, virtual, Marc'Aurelio Ranzato, Alina Beygelzimer, Yann N. Dauphin, Percy Liang, and Jennifer Wortman Vaughan (Eds.). 19974--19988. https:\/\/proceedings.neurips.cc\/paper\/2021\/hash\/a61f27ab2165df0e18cc9433bd7f27c5-Abstract.html"},{"key":"e_1_3_2_1_5_1","volume-title":"9th International Conference on Learning Representations, ICLR 2021","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3--7, 2021."},{"key":"e_1_3_2_1_6_1","volume-title":"The Unreasonable Ineffectiveness of the Deeper Layers. In NeurIPS 2024 Workshop on Scientific Methods for Understanding Deep Learning. https:\/\/openreview.net\/forum?id=jwhPErqvdS","author":"Gromov Andrey","year":"2024","unstructured":"Andrey Gromov, Kushal Tirumala, Hassan Shapourian, Paolo Glorioso, and Dan Roberts. 2024. The Unreasonable Ineffectiveness of the Deeper Layers. In NeurIPS 2024 Workshop on Scientific Methods for Understanding Deep Learning. https:\/\/openreview.net\/forum?id=jwhPErqvdS"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3355890"},{"key":"e_1_3_2_1_8_1","volume-title":"Masked Autoencoders Are Scalable Vision Learners. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022","author":"He Kaiming","year":"2022","unstructured":"Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li, Piotr Doll\u00e1r, and Ross B. Girshick. 2022. Masked Autoencoders Are Scalable Vision Learners. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, June 18--24, 2022. IEEE, 15979--15988."},{"key":"e_1_3_2_1_9_1","volume-title":"What Matters in Transformers? Not All Attention is Needed. CoRR","author":"He Shwai","year":"2024","unstructured":"Shwai He, Guoheng Sun, Zheyu Shen, and Ang Li. 2024b. What Matters in Transformers? Not All Attention is Needed. CoRR, Vol. abs\/2406.15786 (2024)."},{"key":"e_1_3_2_1_10_1","unstructured":"Alex Krizhevsky. 2009. Learning multiple layers of features from tiny images. Technical Report TR-2009. University of Toronto Toronto ON Canada."},{"key":"e_1_3_2_1_11_1","volume-title":"The Tenth International Conference on Learning Representations, ICLR 2022","author":"Liang Youwei","year":"2022","unstructured":"Youwei Liang, Chongjian Ge, Zhan Tong, Yibing Song, Jue Wang, and Pengtao Xie. 2022. EViT: Expediting Vision Transformers via Token Reorganizations. In The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, April 25--29, 2022. OpenReview.net."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_13_1","volume-title":"Neural Network Pruning With Residual-Connections and Limited-Data. In 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2020","author":"Luo Jian-Hao","year":"2020","unstructured":"Jian-Hao Luo and Jianxin Wu. 2020. Neural Network Pruning With Residual-Connections and Limited-Data. In 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2020, Seattle, WA, USA, June 13--19, 2020. Computer Vision Foundation \/ IEEE, 1455--1464."},{"key":"e_1_3_2_1_14_1","volume-title":"ShortGPT: Layers in Large Language Models are More Redundant Than You Expect. CoRR","author":"Men Xin","year":"2024","unstructured":"Xin Men, Mingyu Xu, Qingyu Zhang, Bingning Wang, Hongyu Lin, Yaojie Lu, Xianpei Han, and Weipeng Chen. 2024. ShortGPT: Layers in Large Language Models are More Redundant Than You Expect. CoRR, Vol. abs\/2403.03853 (2024)."},{"volume-title":"A visual vocabulary for flower classification. In 2006 IEEE computer society conference on computer vision and pattern recognition (CVPR'06)","author":"Nilsback M-E","key":"e_1_3_2_1_15_1","unstructured":"M-E Nilsback and Andrew Zisserman. 2006. A visual vocabulary for flower classification. In 2006 IEEE computer society conference on computer vision and pattern recognition (CVPR'06), Vol. 2. IEEE, 1447--1454."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01501"},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18--24","volume":"10357","author":"Touvron Hugo","year":"2021","unstructured":"Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, and Herv\u00e9 J\u00e9gou. 2021. Training data-efficient image transformers & distillation through attention. In Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18--24 July 2021, Virtual Event (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 10347--10357."},{"key":"e_1_3_2_1_19_1","volume-title":"\u0141 ukasz Kaiser, and Illia Polosukhin","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141 ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, I. Guyon, U. Von Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett (Eds.), Vol. 30. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"e_1_3_2_1_20_1","volume-title":"Practical Network Acceleration with Tiny Sets. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023","author":"Wang Guo-Hua","year":"2023","unstructured":"Guo-Hua Wang and Jianxin Wu. 2023. Practical Network Acceleration with Tiny Sets. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023, Vancouver, BC, Canada, June 17--24, 2023. IEEE, 20331--20340."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00078"},{"key":"e_1_3_2_1_22_1","volume-title":"DBP: Discrimination Based Block-Level Pruning for Deep Model Acceleration. CoRR","author":"Wang Wenxiao","year":"2019","unstructured":"Wenxiao Wang, Shuai Zhao, Minghao Chen, Jinming Hu, Deng Cai, and Haifeng Liu. 2019. DBP: Discrimination Based Block-Level Pruning for Deep Model Acceleration. CoRR, Vol. abs\/1912.10178 (2019). showeprint[arXiv]1912.10178 http:\/\/arxiv.org\/abs\/1912.10178"},{"key":"e_1_3_2_1_23_1","volume-title":"Joint Token Pruning and Squeezing Towards More Aggressive Compression of Vision Transformers. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023","author":"Wei Siyuan","year":"2023","unstructured":"Siyuan Wei, Tianzhu Ye, Shen Zhang, Yao Tang, and Jiajun Liang. 2023. Joint Token Pruning and Squeezing Towards More Aggressive Compression of Vision Transformers. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023, Vancouver, BC, Canada, June 17--24, 2023. IEEE, 2092--2101."},{"key":"e_1_3_2_1_24_1","volume-title":"Proceedings, Part XXVII (Lecture Notes in Computer Science","volume":"427","author":"Wu Zhuguanyu","year":"2024","unstructured":"Zhuguanyu Wu, Jiaxin Chen, Hanwen Zhong, Di Huang, and Yunhong Wang. 2024. AdaLog: Post-training Quantization for Vision Transformers with Adaptive Logarithm Quantizer. In Computer Vision - ECCV 2024 - 18th European Conference, Milan, Italy, September 29-October 4, 2024, Proceedings, Part XXVII (Lecture Notes in Computer Science, Vol. 15085), Ales Leonardis, Elisa Ricci, Stefan Roth, Olga Russakovsky, Torsten Sattler, and G\u00fcl Varol (Eds.). Springer, 411--427."},{"key":"e_1_3_2_1_25_1","volume-title":"European Conference on Computer Vision. Springer, 269--287","author":"Xu Kaixin","year":"2024","unstructured":"Kaixin Xu, Zhe Wang, Chunyun Chen, Xue Geng, Jie Lin, Xulei Yang, Min Wu, Xiaoli Li, and Weisi Lin. 2024a. Lpvit: Low-power semi-structured pruning for vision transformers. In European Conference on Computer Vision. Springer, 269--287."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00016"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00145"},{"key":"e_1_3_2_1_28_1","volume-title":"GOHSP: A Unified Framework of Graph and Optimization-Based Heterogeneous Structured Pruning for Vision Transformer. In Thirty-Seventh AAAI Conference on Artificial Intelligence, AAAI","author":"Yin Miao","year":"2023","unstructured":"Miao Yin, Burak Uzkent, Yilin Shen, Hongxia Jin, and Bo Yuan. 2023. GOHSP: A Unified Framework of Graph and Optimization-Based Heterogeneous Structured Pruning for Vision Transformer. In Thirty-Seventh AAAI Conference on Artificial Intelligence, AAAI 2023, Thirty-Fifth Conference on Innovative Applications of Artificial Intelligence, IAAI 2023, Thirteenth Symposium on Educational Advances in Artificial Intelligence, EAAI 2023, Washington, DC, USA, February 7--14, 2023, Brian Williams, Yiling Chen, and Jennifer Neville (Eds.). AAAI Press, 10954--10962."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-022-3646-6"},{"key":"e_1_3_2_1_30_1","volume-title":"MiniViT: Compressing Vision Transformers with Weight Multiplexing. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022","author":"Zhang Jinnian","year":"2022","unstructured":"Jinnian Zhang, Houwen Peng, Kan Wu, Mengchen Liu, Bin Xiao, Jianlong Fu, and Lu Yuan. 2022. MiniViT: Compressing Vision Transformers with Weight Multiplexing. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2022, New Orleans, LA, USA, June 18--24, 2022. IEEE, 12135--12144."},{"key":"e_1_3_2_1_31_1","volume-title":"Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022","author":"Zheng Chuanyang","year":"2022","unstructured":"Chuanyang Zheng, Zheyang Li, Kai Zhang, Zhi Yang, Wenming Tan, Jun Xiao, Ye Ren, and Shiliang Pu. 2022. SAViT: Structure-Aware Vision Transformer Pruning via Collaborative Optimization. In Advances in Neural Information Processing Systems 35: Annual Conference on Neural Information Processing Systems 2022, NeurIPS 2022, New Orleans, LA, USA, November 28 - December 9, 2022, Sanmi Koyejo, S. Mohamed, A. Agarwal, Danielle Belgrave, K. Cho, and A. Oh (Eds.)."}],"event":{"name":"ICMR '25: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Chicago IL USA","acronym":"ICMR '25"},"container-title":["Proceedings of the 2025 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731715.3733341","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T04:13:00Z","timestamp":1755749580000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731715.3733341"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":31,"alternative-id":["10.1145\/3731715.3733341","10.1145\/3731715"],"URL":"https:\/\/doi.org\/10.1145\/3731715.3733341","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]},"assertion":[{"value":"2025-06-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}