{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T15:30:10Z","timestamp":1773588610124,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","funder":[{"name":"National Key R&amp;D Program of China","award":["2023YFB4503500"],"award-info":[{"award-number":["2023YFB4503500"]}]},{"name":"Beijing Natural Science Foundation","award":["L234078"],"award-info":[{"award-number":["L234078"]}]},{"name":"National Natural Science Foundation of China","award":["62502498"],"award-info":[{"award-number":["62502498"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,3,22]]},"DOI":"10.1145\/3779212.3790132","type":"proceedings-article","created":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T13:55:26Z","timestamp":1773150926000},"page":"239-254","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["BitRed: Taming Non-Uniform Bit-Level Sparsity with a Programmable RISC-V ISA for DNN Acceleration"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-8460-8173","authenticated-orcid":false,"given":"Yanhuan","family":"Liu","sequence":"first","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4069-2251","authenticated-orcid":false,"given":"Wenming","family":"Li","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6000-3869","authenticated-orcid":false,"given":"Kunming","family":"Zhang","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7774-8389","authenticated-orcid":false,"given":"Yuqun","family":"Liu","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-1501-5321","authenticated-orcid":false,"given":"Siao","family":"Wen","sequence":"additional","affiliation":[{"name":"Ricore IC Technologies Ltd., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4008-3081","authenticated-orcid":false,"given":"Lexin","family":"Wang","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3905-6936","authenticated-orcid":false,"given":"Tianyu","family":"Liu","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2949-2297","authenticated-orcid":false,"given":"Haibin","family":"Wu","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5950-7370","authenticated-orcid":false,"given":"Zhihua","family":"Fan","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0065-6128","authenticated-orcid":false,"given":"Xiaochun","family":"Ye","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5219-0908","authenticated-orcid":false,"given":"Dongrui","family":"Fan","sequence":"additional","affiliation":[{"name":"Ricore IC Technologies Ltd., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0494-6332","authenticated-orcid":false,"given":"Xuejun","family":"An","sequence":"additional","affiliation":[{"name":"State Key Lab of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,3,22]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Bit-Pragmatic Deep Neural Network Computing. In 2017 50th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). 382-394","author":"Albericio Jorge","year":"2017","unstructured":"Jorge Albericio, Alberto Delm\u00e1s, Patrick Judd, Sayeh Sharify, Gerard O'Leary, Roman Genov, and Andreas Moshovos. 2017. Bit-Pragmatic Deep Neural Network Computing. In 2017 50th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). 382-394."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42615.2023.10067269"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1375581.1375595"},{"key":"e_1_3_2_1_4_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems Vol. 33 (2020) 1877-1901."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2654822.2541967"},{"key":"e_1_3_2_1_6_1","first-page":"578","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, et al., 2018. TVM: An automated End-to-End optimizing compiler for deep learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). 578-594."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001177"},{"key":"e_1_3_2_1_8_1","first-page":"4171","volume-title":"Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers). 4171-4186."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"A. Gholami S. Kim Z. Dong Z. Yao M. W. Mahoney and K. Keutzer. 2021. A survey of quantization methods for efficient neural network inference. In Low-Power Computer Vision. Chapman and Hall\/CRC. 291\u2013326.","DOI":"10.1201\/9781003162810-13"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00095"},{"key":"e_1_3_2_1_11_1","volume-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149","author":"Han Song","year":"2015","unstructured":"Song Han, Huizi Mao, and William J Dally. 2015. Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149 (2015)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358275"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00286"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00010"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783722"},{"key":"e_1_3_2_1_19_1","volume-title":"Nvidia jetson agx orin series. A Giant Leap Forward for Robotics and Edge AI Applications. Technical Brief","author":"Karumbunathan Leela S","year":"2022","unstructured":"Leela S Karumbunathan. 2022. Nvidia jetson agx orin series. A Giant Leap Forward for Robotics and Edge AI Applications. Technical Brief (2022)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_26"},{"key":"e_1_3_2_1_21_1","volume-title":"Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. 2012. Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems, Vol. 25 (2012)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.618"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2004.1281665"},{"key":"e_1_3_2_1_24_1","volume-title":"Deep learning. nature","author":"LeCun Yann","year":"2015","unstructured":"Yann LeCun, Yoshua Bengio, and Geoffrey Hinton. 2015. Deep learning. nature, Vol. 521, 7553 (2015), 436-444."},{"key":"e_1_3_2_1_25_1","volume-title":"Optimal brain damage. Advances in neural information processing systems","author":"LeCun Yann","year":"1989","unstructured":"Yann LeCun, John Denker, and Sara Solla. 1989. Optimal brain damage. Advances in neural information processing systems, Vol. 2 (1989)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2022.3146202"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413631"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480123"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA61900.2025.00107"},{"key":"e_1_3_2_1_30_1","unstructured":"NVIDIA. 2020. NVIDIA A100 Tensor Core GPU Architecture Whitepaper. https:\/\/images.nvidia.com\/aem-dam\/en-zz\/Solutions\/data-center\/nvidia-ampere-architecture-whitepaper.pdf Accessed: 2025-08-18."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080254"},{"key":"e_1_3_2_1_32_1","unstructured":"Alec Radford Jeff Wu Rewon Child David Luan Dario Amodei and Ilya Sutskever. 2019. Language Models are Unsupervised Multitask Learners. https:\/\/api.semanticscholar.org\/CorpusID:160025533"},{"key":"e_1_3_2_1_33_1","unstructured":"Joseph Redmon and Ali Farhadi. 2018. YOLOv3: An Incremental Improvement. arXiv:1804.02767 [cs.CV] https:\/\/arxiv.org\/abs\/1804.02767"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_2_1_35_1","volume-title":"Laconic Deep Learning Inference Acceleration. In 2019 ACM\/IEEE 46th Annual International Symposium on Computer Architecture (ISCA). 304-317","author":"Sharify Sayeh","year":"2019","unstructured":"Sayeh Sharify, Alberto Delmas Lascorz, Mostafa Mahmoud, Milos Nikolic, Kevin Siu, Dylan Malone Stuart, Zissis Poulos, and Andreas Moshovos. 2019. Laconic Deep Learning Inference Acceleration. In 2019 ACM\/IEEE 46th Annual International Symposium on Computer Architecture (ISCA). 304-317."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00062"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2022.3226481"},{"key":"e_1_3_2_1_38_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2023.3324477"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2017.2761740"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00972"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"V. Titopoulos K. Alexandridis C. Peltekis C. Nicopoulos and G. Dimitrakopoulos. 2023. IndexMAC: A Custom RISC-V Vector Instruction to Accelerate Structured-Sparse Matrix Multiplications. arXiv:2311.07241 [cs.AR] https:\/\/arxiv.org\/abs\/2311.07241","DOI":"10.23919\/DATE58400.2024.10546747"},{"key":"e_1_3_2_1_43_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"e_1_3_2_1_45_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_46_1","volume-title":"User-level ISA, version 2.0. EECS Department","author":"Waterman Andrew","year":"2014","unstructured":"Andrew Waterman, Yunsup Lee, David A Patterson, and Krste Asanovic. 2014. The RISC-V instruction set manual, volume I: User-level ISA, version 2.0. EECS Department, University of California, Berkeley, Tech. Rep. UCB\/EECS-2014-54 (2014), 4."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00096"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2020.3013518"},{"key":"e_1_3_2_1_50_1","volume-title":"Densely Connected Pyramid Dehazing Network. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition., 3194\u20133203","author":"Zhang He","unstructured":"He Zhang and Vishal M. Patel. 2018. Densely Connected Pyramid Dehazing Network. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition., 3194\u20133203."},{"key":"e_1_3_2_1_51_1","volume-title":"Xi Victoria Lin, et al","author":"Zhang Susan","year":"2022","unstructured":"Susan Zhang, Stephen Roller, Naman Goyal, Mikel Artetxe, Moya Chen, Shuohui Chen, Christopher Dewan, Mona Diab, Xian Li, Xi Victoria Lin, et al., 2022. Opt: Open pre-trained transformer language models. arXiv preprint arXiv:2205.01068 (2022)."}],"event":{"name":"ASPLOS '26: 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems","location":"Pittsburgh PA USA","sponsor":["SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGARCH ACM Special Interest Group on Computer Architecture","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2"],"original-title":[],"deposited":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T13:56:43Z","timestamp":1773583003000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3779212.3790132"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,22]]},"references-count":51,"alternative-id":["10.1145\/3779212.3790132","10.1145\/3779212"],"URL":"https:\/\/doi.org\/10.1145\/3779212.3790132","relation":{},"subject":[],"published":{"date-parts":[[2026,3,22]]},"assertion":[{"value":"2026-03-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}