{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T14:55:45Z","timestamp":1761404145936,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":74,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746262.3761975","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T14:52:23Z","timestamp":1761403943000},"page":"11-19","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Efficient and Accurate Post-Training Sparsification of Large Language Models with Proximal Operators"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5018-2859","authenticated-orcid":false,"given":"Pu","family":"Zhao","sequence":"first","affiliation":[{"name":"Northeastern University, Boston, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7023-1176","authenticated-orcid":false,"given":"Dani","family":"Gunawan","sequence":"additional","affiliation":[{"name":"Northeastern University, Boston, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4965-7321","authenticated-orcid":false,"given":"Xuan","family":"Shen","sequence":"additional","affiliation":[{"name":"Northeastern University, Boston, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3882-5484","authenticated-orcid":false,"given":"Zheng","family":"Zhan","sequence":"additional","affiliation":[{"name":"Northeastern University, Boston, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-1143-9063","authenticated-orcid":false,"given":"Xuehang","family":"Guo","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Champaign, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3808-4599","authenticated-orcid":false,"given":"Jun","family":"Liu","sequence":"additional","affiliation":[{"name":"Northeastern University, Boston, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8120-4456","authenticated-orcid":false,"given":"Zhenglun","family":"Kong","sequence":"additional","affiliation":[{"name":"Harvard University, Boston, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3024-7990","authenticated-orcid":false,"given":"Yanzhi","family":"Wang","sequence":"additional","affiliation":[{"name":"Northeastern University, Boston, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9194-1233","authenticated-orcid":false,"given":"Gaowen","family":"Liu","sequence":"additional","affiliation":[{"name":"Cisco Research, San Francisco, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6210-8883","authenticated-orcid":false,"given":"Xue","family":"Lin","sequence":"additional","affiliation":[{"name":"Northeastern University, Boston, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,26]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2019. WinoGrande: An Adversarial Winograd Schema Challenge at Scale."},{"key":"e_1_3_2_1_2_1","unstructured":"Yongqi An Xu Zhao Tao Yu Ming Tang and Jinqiao Wang. 2023. Fluctuation-based Adaptive Structured Pruning for Large Language Models. arXiv:2312.11983 [cs.CL]"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3005348"},{"key":"e_1_3_2_1_4_1","volume-title":"The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=vXxardq6db","author":"Ashkboos Saleh","year":"2024","unstructured":"Saleh Ashkboos, Maximilian L. Croci, Marcelo Gennari do Nascimento, Torsten Hoefler, and James Hensman. 2024. SliceGPT: Compress Large Language Models by Deleting Rows and Columns. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=vXxardq6db"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Michael Boratko Harshit Padigela Divyendra Mikkilineni Pritish Yuvraj Rajarshi Das Andrew McCallum Maria Chang Achille Fokoue-Nkoutche Pavan Kapanipathi Nicholas Mattei et al. 2018. A systematic classification of knowledge reasoning and context within the ARC dataset. arXiv preprint arXiv:1806.00358 (2018).","DOI":"10.18653\/v1\/W18-2607"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Stephen Boyd Neal Parikh Eric Chu Borja Peleato Jonathan Eckstein et al. 2011. Distributed optimization and statistical learning via the alternating direction method of multipliers. Foundations and Trends\u00ae in Machine learning 3 1 (2011) 1--122.","DOI":"10.1561\/2200000016"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607087"},{"key":"e_1_3_2_1_8_1","first-page":"4475","article-title":"Optimal brain compression:A framework for accurate post-training quantization and pruning","volume":"35","author":"Frantar Elias","year":"2022","unstructured":"Elias Frantar and Dan Alistarh. 2022. Optimal brain compression:A framework for accurate post-training quantization and pruning. Advances in Neural Information Processing Systems 35 (2022), 4475--4488.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"Frantar Elias","year":"2023","unstructured":"Elias Frantar and Dan Alistarh. 2023. SparseGPT: Massive Language Models Can Be Accurately Pruned in One-Shot. In Proceedings of the 40th International Conference on Machine Learning (Honolulu, Hawaii, USA) (ICML'23). JMLR.org, Article 414, 15 pages."},{"key":"e_1_3_2_1_10_1","volume-title":"GPTQ: Accurate post-training compression for generative pretrained transformers. arXiv preprint arXiv:2210.17323","author":"Frantar Elias","year":"2022","unstructured":"Elias Frantar, Saleh Ashkboos, Torsten Hoefler, and Dan Alistarh. 2022. GPTQ: Accurate post-training compression for generative pretrained transformers. arXiv preprint arXiv:2210.17323 (2022)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3657310"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3508352.3549379"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247713"},{"key":"e_1_3_2_1_14_1","volume-title":"Improving Efficiency by Shrinkage: The James--Stein and Ridge Regression Estimators","author":"Gruber M.","unstructured":"M. Gruber. 1998. Improving Efficiency by Shrinkage: The James--Stein and Ridge Regression Estimators. Taylor & Francis. https:\/\/books.google.com\/books?id=wmA_R3ZFrXYC"},{"key":"e_1_3_2_1_15_1","volume-title":"Mamba: Linear-time sequence modeling with selective state spaces. arXiv preprint #38;arXiv:2312.00752","author":"Gu Albert","year":"2023","unstructured":"Albert Gu and Tri Dao. 2023. Mamba: Linear-time sequence modeling with selective state spaces. arXiv preprint #38;arXiv:2312.00752 (2023)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3418297"},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the 28th International Conference on Neural Information Processing Systems -","volume":"1","author":"Han Song","unstructured":"Song Han, Jeff Pool, John Tran, and William J. Dally. 2015. Learning both weights and connections for efficient neural networks. In Proceedings of the 28th International Conference on Neural Information Processing Systems - Volume 1 (Montreal, Canada) (NIPS'15). MIT Press, Cambridge, MA, USA, 1135--1143."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICNN.1993.298572"},{"key":"e_1_3_2_1_19_1","volume-title":"Accelerated sparse neural training: A provable and efficient method to find n:mtransposable masks. Advances in neural information processing systems 34","author":"Hubara Itay","year":"2021","unstructured":"Itay Hubara, Brian Chmiel, Moshe Island, Ron Banner, Joseph Naor, and Daniel Soudry. 2021. Accelerated sparse neural training: A provable and efficient method to find n:mtransposable masks. Advances in neural information processing systems 34 (2021), 21099--21111."},{"key":"e_1_3_2_1_20_1","volume-title":"International Conference on Machine Learning. PMLR, 4466--4475","author":"Hubara Itay","year":"2021","unstructured":"Itay Hubara, Yury Nahshan, Yair Hanani, Ron Banner, and Daniel Soudry. 2021. Accurate post training quantization with small calibration sets. In International Conference on Machine Learning. PMLR, 4466--4475."},{"key":"e_1_3_2_1_21_1","volume-title":"Principal Component Analysis","author":"Jolliffe I.T.","unstructured":"I.T. Jolliffe. 2006. Principal Component Analysis. Springer New York. https:\/\/books.google.com\/books?id=6ZUMBwAAQBAJ"},{"key":"e_1_3_2_1_22_1","volume-title":"Touretzky (Ed.)","volume":"2","author":"LeCun Yann","year":"1989","unstructured":"Yann LeCun, John Denker, and Sara Solla. 1989. Optimal Brain Damage. In Advances in Neural Information Processing Systems, D. Touretzky (Ed.), Vol. 2. Morgan-Kaufmann. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/1989\/file\/6c9882bbac1c7093bd25041881277658-Paper.pdf"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","unstructured":"Yanyu Li Changdi Yang Pu Zhao et al. 2023. Towards real-time segmentation on the edge (AAAI'23\/IAAI'23\/EAAI'23). Article 163 9 pages. doi:10.1609\/aaai.v37i2.25232","DOI":"10.1609\/aaai.v37i2.25232"},{"key":"e_1_3_2_1_24_1","volume-title":"Pruning-as-search: Efficient neural architecture search via channel pruning and structural reparameterization. arXiv preprint arXiv:2206.01198","author":"Li Yanyu","year":"2022","unstructured":"Yanyu Li, Pu Zhao, Geng Yuan, Xue Lin, Yanzhi Wang, and Xin Chen. 2022. Pruning-as-search: Efficient neural architecture search via channel pruning and structural reparameterization. arXiv preprint arXiv:2206.01198 (2022)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","unstructured":"Zhengang Li Geng Yuan Wei Niu Pu Zhao Yanyu Li Yuxuan Cai Xuan Shen Zheng Zhan Zhenglun Kong Qing Jin Zhiyu Chen Sijia Liu Kaiyuan Yang Bin Ren Yanzhi Wang and Xue Lin. 2021. NPAS: A Compiler-aware Framework of Unified Network Pruning and Architecture Search for Beyond Real-Time Mobile Acceleration. In 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 14250--14261. doi:10.1109\/CVPR46437.2021.01403","DOI":"10.1109\/CVPR46437.2021.01403"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.07.045"},{"key":"e_1_3_2_1_27_1","unstructured":"Xinyin Ma Gongfan Fang and Xinchao Wang. 2023. LLM-Pruner: On the Structural Pruning of Large Language Models. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_28_1","volume-title":"Human Language Technology: Proceedings of a Workshop held at Plainsboro, New Jersey, March 8--11","author":"Marcus Mitch","year":"1994","unstructured":"Mitch Marcus, Grace Kim, Mary Ann Marcinkiewicz, Robert MacIntyre, Ann Bies, Mark Ferguson, Karen Katz, and Britta Schasberger. 1994. The Penn treebank: Annotating predicate argument structure. In Human Language Technology: Proceedings of a Workshop held at Plainsboro, New Jersey, March 8--11, 1994."},{"key":"e_1_3_2_1_29_1","volume-title":"Pointer sentinel mixture models. arXiv preprint arXiv:1609.07843","author":"Merity Stephen","year":"2016","unstructured":"Stephen Merity, Caiming Xiong, James Bradbury, and Richard Socher. 2016. Pointer sentinel mixture models. arXiv preprint arXiv:1609.07843 (2016)."},{"key":"e_1_3_2_1_30_1","volume-title":"International Conference on Machine Learning. PMLR, 7197--7206","author":"Nagel Markus","year":"2020","unstructured":"Markus Nagel, Rana Ali Amjad, Mart Van Baalen, Christos Louizos, and Tijmen Blankevoort. 2020. Up or down? adaptive rounding for post-training quantization. In International Conference on Machine Learning. PMLR, 7197--7206."},{"key":"e_1_3_2_1_31_1","volume-title":"GPT-4 technical report. ArXiv 2303","author":"R","year":"2023","unstructured":"R OpenAI. 2023. GPT-4 technical report. ArXiv 2303 (2023)."},{"key":"e_1_3_2_1_32_1","volume-title":"Raffaella Bernardi, Sandro Pezzelle, Marco Baroni, Gemma Boleda, and Raquel Fern\u00e1ndez.","author":"Paperno Denis","year":"2016","unstructured":"Denis Paperno, Germ\u00e1n Kruszewski, Angeliki Lazaridou, Quan Ngoc Pham, Raffaella Bernardi, Sandro Pezzelle, Marco Baroni, Gemma Boleda, and Raquel Fern\u00e1ndez. 2016. The LAMBADA dataset: Word prediction requiring a broad discourse context. arXiv preprint arXiv:1606.06031 (2016)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Neal Parikh Stephen Boyd et al. 2014. Proximal algorithms. Foundations and trends\u00ae in Optimization 1 3 (2014) 127--239.","DOI":"10.1561\/2400000003"},{"key":"e_1_3_2_1_34_1","volume-title":"PyTorch: an imperative style, high-performance deep learning library","author":"Paszke Adam","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas K\u00f6pf, Edward Yang, Zach DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: an imperative style, high-performance deep learning library. Curran Associates Inc., Red Hook, NY, USA."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_1_36_1","volume-title":"Fran\u00e7ois Yvon, Matthias Gall\u00e9, et al.","author":"Scao Teven Le","year":"2022","unstructured":"Teven Le Scao, Angela Fan, Christopher Akiki, Ellie Pavlick, Suzana Ili?, Daniel Hesslow, Roman Castagn\u00e9, Alexandra Sasha Luccioni, Fran\u00e7ois Yvon, Matthias Gall\u00e9, et al. 2022. Bloom: A 176b-parameter open-access multilingual language model. arXiv preprint arXiv:2211.05100 (2022)."},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"38","author":"Shen Xuan","year":"2024","unstructured":"Xuan Shen, Peiyan Dong, Lei Lu, Zhenglun Kong, Zhengang Li, Ming Lin, Chao Wu, and Yanzhi Wang. 2024. Agile-Quant: Activation-Guided Quantization for Faster Inference of LLMs on the Edge. In Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 38. 18944--18951."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Xuan Shen Weize Ma Jing Liu et al. 2025. QuartDepth: Post-Training Quantization for Real-Time Depth Estimation on the Edge. In CVPR.","DOI":"10.1109\/CVPR52734.2025.01069"},{"key":"e_1_3_2_1_39_1","volume-title":"Lazydit: Lazy learning for the acceleration of diffusion transformers. In AAAI.","author":"Shen Xuan","year":"2025","unstructured":"Xuan Shen, Zhao Song, Yufa Zhou, et al. 2025. Lazydit: Lazy learning for the acceleration of diffusion transformers. In AAAI."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"Xuan Shen Zhao Song Yufa Zhou et al. 2025. Numerical pruning for efficient autoregressive models. In AAAI.","DOI":"10.1609\/aaai.v39i19.34249"},{"key":"e_1_3_2_1_41_1","unstructured":"Xuan Shen Pu Zhao Yifan Gong Zhenglun Kong Zheng Zhan Yushu Wu Ming Lin Chao Wu Xue Lin and Yanzhi Wang. 2024. Search for Efficient Large Language Models. In NeurIPS."},{"key":"e_1_3_2_1_42_1","unstructured":"Xuan Shen Hangyu Zheng Yifan Gong et al. 2025. Sparse Learning for State Space Models on Mobile. In ICLR. https:\/\/openreview.net\/forum?id=t8KLjiFNwn"},{"key":"e_1_3_2_1_43_1","volume-title":"A Simple and Effective Pruning Approach for Large Language Models. arXiv preprint arXiv:2306.11695","author":"Sun Mingjie","year":"2023","unstructured":"Mingjie Sun, Zhuang Liu, Anna Bair, and J. Zico Kolter. 2023. A Simple and Effective Pruning Approach for Large Language Models. arXiv preprint arXiv:2306.11695 (2023)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18072.2020.9218571"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3299874.3317996"},{"key":"e_1_3_2_1_46_1","volume-title":"15th International Conference on Scientific and Statistical Database Management","author":"Tata Sandeep","year":"2003","unstructured":"Sandeep Tata and Jignesh M Patel. 2003. PiQA: An algebra for querying protein data sets. In 15th International Conference on Scientific and Statistical Database Management, 2003. IEEE, 141--150."},{"key":"e_1_3_2_1_47_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6105"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"ThomasWolf Lysandre Debut Victor Sanh Julien Chaumond Clement Delangue Anthony Moi Pierric Cistac Tim Rault R\u00e9mi Louf Morgan Funtowicz et al. 2019. Huggingface's transformers: State-of-the-art natural language processing. arXiv preprint arXiv:1910.03771 (2019).","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Yushu Wu Yifan Gong Pu Zhao et al. 2022. Compiler-aware neural architecture search for on-mobile real-time super-resolution. In ECCV. Springer 92--111.","DOI":"10.1007\/978-3-031-19800-7_6"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.14778\/3626292.3626303"},{"key":"e_1_3_2_1_52_1","volume-title":"Structured Adversarial Attack: Towards General Implementation and Better Interpretability. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=BkgzniCqY7","author":"Xu Kaidi","year":"2019","unstructured":"Kaidi Xu, Sijia Liu, Pu Zhao, Pin-Yu Chen, Huan Zhang, Quanfu Fan, Deniz Erdogmus, Yanzhi Wang, and Xue Lin. 2019. Structured Adversarial Attack: Towards General Implementation and Better Interpretability. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=BkgzniCqY7"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"crossref","unstructured":"Changdi Yang Pu Zhao Yanyu Li et al. 2023. Pruning parameterization with bi-level optimization for efficient semantic segmentation on the edge. In CVPR.","DOI":"10.1109\/CVPR52729.2023.01478"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"crossref","first-page":"110551","DOI":"10.1016\/j.automatica.2022.110551","article-title":"Proximal admm for nonconvex and nonsmooth optimization","volume":"146","author":"Yang Yu","year":"2022","unstructured":"Yu Yang, Qing-Shan Jia, Zhanbo Xu, Xiaohong Guan, and Costas J Spanos. 2022. Proximal admm for nonconvex and nonsmooth optimization. Automatica 146 (2022), 110551.","journal-title":"Automatica"},{"key":"e_1_3_2_1_55_1","first-page":"27168","article-title":"Zeroquant: Efficient and affordable post-training quantization for large-scale transformers","volume":"35","author":"Yao Zhewei","year":"2022","unstructured":"Zhewei Yao, Reza Yazdani Aminabadi, Minjia Zhang, Xiaoxia Wu, Conglong Li, and Yuxiong He. 2022. Zeroquant: Efficient and affordable post-training quantization for large-scale transformers. Advances in Neural Information Processing Systems 35 (2022), 27168--27183.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_56_1","volume-title":"Hellaswag: Can a machine really finish your sentence? arXiv preprint arXiv:1905.07830","author":"Zellers Rowan","year":"2019","unstructured":"Rowan Zellers, Ari Holtzman, Yonatan Bisk, Ali Farhadi, and Yejin Choi. 2019. Hellaswag: Can a machine really finish your sentence? arXiv preprint arXiv:1905.07830 (2019)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Zheng Zhan Yifan Gong Pu Zhao Geng Yuan et al. 2021. Achieving on-mobile real-time super-resolution with neural architecture and pruning search. In ICCV. 4821--4831.","DOI":"10.1109\/ICCV48922.2021.00478"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"crossref","unstructured":"Zheng Zhan Zhenglun Kong Yifan Gong et al. 2024. Exploring Token Pruning in Vision State Space Models. In NeurIPS. https:\/\/openreview.net\/forum?id=eWiGn0Fcdx","DOI":"10.18653\/v1\/2024.emnlp-main.100"},{"key":"e_1_3_2_1_59_1","unstructured":"Zheng Zhan Yushu Wu Yifan Gong et al. 2024. Fast and Memory-Efficient Video Diffusion Using Streamlined Inference. In NeurIPS. https:\/\/openreview.net\/forum?id=iNvXYQrkpi"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"crossref","unstructured":"Zheng Zhan YushuWu Zhenglun Kong et al. 2024. Rethinking Token Reduction for State Space Models. In EMNLP. ACL Miami Florida USA 1686--1697. https:\/\/aclanthology.org\/2024.emnlp-main.100","DOI":"10.18653\/v1\/2024.emnlp-main.100"},{"key":"e_1_3_2_1_61_1","volume-title":"Xi Victoria Lin, et al","author":"Zhang Susan","year":"2022","unstructured":"Susan Zhang, Stephen Roller, Naman Goyal, Mikel Artetxe, Moya Chen, Shuohui Chen, Christopher Dewan, Mona Diab, Xian Li, Xi Victoria Lin, et al. 2022. Opt: Open pre-trained transformer language models. arXiv preprint arXiv:2205.01068 (2022)."},{"key":"e_1_3_2_1_62_1","volume-title":"Plug-and-Play: An Efficient Post-training Pruning Method for Large Language Models. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=Tr0lPx9woF","author":"Zhang Yingtao","year":"2024","unstructured":"Yingtao Zhang, Haoli Bai, Jialin Zhao, Haokun Lin, Lu Hou, and Carlo Vittorio Cannistraci. 2024. Plug-and-Play: An Efficient Post-training Pruning Method for Large Language Models. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=Tr0lPx9woF"},{"key":"e_1_3_2_1_63_1","unstructured":"Yihua Zhang Yuguang Yao Parikshit Ram et al. 2022. Advancing model pruning via bi-level optimization. NeurIPS (2022)."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","unstructured":"Pu Zhao Sijia Liu Pin-Yu Chen Nghia Hoang Kaidi Xu Bhavya Kailkhura and Xue Lin. 2019. On the Design of Black-Box Adversarial Examples by Leveraging Gradient-Free Optimization and Operator Splitting Method. In 2019 IEEE\/CVF ICCV. 121--130. doi:10.1109\/ICCV.2019.00021","DOI":"10.1109\/ICCV.2019.00021"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","unstructured":"Pu Zhao Sijia Liu Yanzhi Wang and Xue Lin. [n.d.]. An ADMM-Based Universal Framework for Adversarial Attacks on Deep Neural Networks. In ACM Multimedia 2018 (Seoul Republic of Korea). 9 pages. doi:10.1145\/3240508.3240639","DOI":"10.1145\/3240508.3240639"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTAS52030.2021.00043"},{"key":"e_1_3_2_1_67_1","unstructured":"Pu Zhao Xuan Shen Zhenglun Kong Yixin Shen Sung-En Chang Timothy Rupprecht Lei Lu Enfu Nan Changdi Yang Yumei He et al. 2024. Fully Open Source Moxin-7B Technical Report. arXiv preprint arXiv:2412.06845 (2024)."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3317825"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3287624.3288750"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/GlobalSIP.2018.8646651"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586163"},{"key":"e_1_3_2_1_73_1","volume-title":"Learning n:mfine-grained structured sparse neural networks from scratch. arXiv preprint arXiv:2102.04010","author":"Zhou Aojun","year":"2021","unstructured":"Aojun Zhou, Yukun Ma, Junnan Zhu, Jianbo Liu, Zhijie Zhang, Kun Yuan,Wenxiu Sun, and Hongsheng Li. 2021. Learning n:mfine-grained structured sparse neural networks from scratch. arXiv preprint arXiv:2102.04010 (2021)."},{"key":"e_1_3_2_1_74_1","volume-title":"To prune, or not to prune: exploring the efficacy of pruning for model compression. arXiv preprint arXiv:1710.01878","author":"Zhu Michael","year":"2017","unstructured":"Michael Zhu and Suyog Gupta. 2017. To prune, or not to prune: exploring the efficacy of pruning for model compression. arXiv preprint arXiv:1710.01878 (2017)."}],"event":{"name":"MM '25:The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 3rd International Workshop on Rich Media With Generative AI"],"original-title":[],"deposited":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T14:52:40Z","timestamp":1761403960000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746262.3761975"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,26]]},"references-count":74,"alternative-id":["10.1145\/3746262.3761975","10.1145\/3746262"],"URL":"https:\/\/doi.org\/10.1145\/3746262.3761975","relation":{},"subject":[],"published":{"date-parts":[[2025,10,26]]},"assertion":[{"value":"2025-10-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}