{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T15:44:47Z","timestamp":1772725487159,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,23]],"date-time":"2024-06-23T00:00:00Z","timestamp":1719100800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Natural Science Foundation of China","award":["61975124"],"award-info":[{"award-number":["61975124"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,23]]},"DOI":"10.1145\/3649329.3655896","type":"proceedings-article","created":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T19:27:22Z","timestamp":1731007642000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["INSPIRE: Accelerating Deep Neural Networks via Hardware-friendly Index-Pair Encoding"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8769-293X","authenticated-orcid":false,"given":"Fangxin","family":"Liu","sequence":"first","affiliation":[{"name":"the Department of Computer Science and Engineering, Shanghai Jiaotong University, Shanghai, Shanghai, China"},{"name":"Shanghai Qizhi Institute (SQI), Shanghai, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6964-8910","authenticated-orcid":false,"given":"Ning","family":"Yang","sequence":"additional","affiliation":[{"name":"the Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, Shanghai, China"},{"name":"Shanghai Qizhi Institute (SQI), Shanghai, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8425-3124","authenticated-orcid":false,"given":"Zhiyan","family":"Song","sequence":"additional","affiliation":[{"name":"Shanghai Qi Zhi Institute, Shanghai, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2157-4927","authenticated-orcid":false,"given":"Zongwu","family":"Wang","sequence":"additional","affiliation":[{"name":"the Department of Computer Science and Engineering, Shanghai Jiaotong University, Shanghai, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2939-6534","authenticated-orcid":false,"given":"Haomin","family":"Li","sequence":"additional","affiliation":[{"name":"the Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8086-6802","authenticated-orcid":false,"given":"Shiyuan","family":"Huang","sequence":"additional","affiliation":[{"name":"the Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6494-4786","authenticated-orcid":false,"given":"Zhuoran","family":"Song","sequence":"additional","affiliation":[{"name":"the Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0810-1458","authenticated-orcid":false,"given":"Songwen","family":"Pei","sequence":"additional","affiliation":[{"name":"the Computer Science and Engineering Department, University of Shanghai for Science and Technology, Shanghai, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7353-8798","authenticated-orcid":false,"given":"Li","family":"Jiang","sequence":"additional","affiliation":[{"name":"the Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, Shanghai, China"},{"name":"Shanghai Qizhi Institute (SQI), Shanghai, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2024,11,7]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"Rajeev Balasubramonian et al. 2017. CACTI 7: New tools for interconnect exploration in innovative off-chip memories. TACO (2017).","DOI":"10.1145\/3085572"},{"key":"e_1_3_2_1_2_1","unstructured":"Tom Brown et al. 2020. Language models are few-shot learners. NIPS (2020)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Yu-Hsin Chen et al. 2019. Eyeriss v2: A flexible accelerator for emerging deep neural networks on mobile devices. JETCAS (2019).","DOI":"10.1109\/JETCAS.2019.2910232"},{"key":"e_1_3_2_1_4_1","unstructured":"Synopsys Design Compiler. 2019. [Online]. Available: https:\/\/www.synopsys.com\/support\/training\/rtlsynthesis\/design-compiler-rtl-synthesis.html."},{"key":"e_1_3_2_1_5_1","volume-title":"Imagenet: A large-scale hierarchical image database. In CVPR.","author":"Jia Deng","year":"2009","unstructured":"Jia Deng et al. 2009. Imagenet: A large-scale hierarchical image database. In CVPR."},{"key":"e_1_3_2_1_6_1","volume-title":"Proc. IEEE","author":"Lei","year":"2020","unstructured":"Lei Deng et al. 2020. Model compression and hardware acceleration for neural networks: A comprehensive survey. Proc. IEEE (2020)."},{"key":"e_1_3_2_1_7_1","unstructured":"Alexey Dosovitskiy et al. 2020. An image is worth 16\u00d716 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_8_1","volume-title":"Ant: Exploiting adaptive numerical data type for low-bit deep neural network quantization. In MICRO.","author":"Cong Guo","year":"2022","unstructured":"Cong Guo et al. 2022. Ant: Exploiting adaptive numerical data type for low-bit deep neural network quantization. In MICRO."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Benoit Jacob et al. 2018. Quantization and training of neural networks for efficient integer-arithmetic-only inference. In CVPR.","DOI":"10.1109\/CVPR.2018.00286"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Shubham Jain et al. 2019. BiScaled-DNN: Quantizing long-tailed datastructures with two scale factors for deep neural networks. In DAC.","DOI":"10.1145\/3316781.3317783"},{"key":"e_1_3_2_1_11_1","unstructured":"Norman P Jouppi et al. 2017. In-datacenter performance analysis of a tensor processing unit. In ISCA."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Sangil Jung et al. 2019. Learning to quantize deep networks by optimizing quantization intervals with task loss. In CVPR.","DOI":"10.1109\/CVPR.2019.00448"},{"key":"e_1_3_2_1_13_1","unstructured":"Fangxin Liu et al. 2021. Improving neural network efficiency via post-training quantization with adaptive floating-point. In ICCV."},{"key":"e_1_3_2_1_14_1","volume-title":"SPARK: Scalable and Precision-Aware Acceleration of Neural Networks via Efficient Encoding. In HPCA.","author":"Fangxin Liu","year":"2024","unstructured":"Fangxin Liu et al. 2024. SPARK: Scalable and Precision-Aware Acceleration of Neural Networks via Efficient Encoding. In HPCA."},{"key":"e_1_3_2_1_15_1","unstructured":"Nvidia. 2020. Ampere Architecture Whitepaper. https:\/\/images.nvidia.cn\/aem-dam\/en-zz\/Solutions\/data-center\/nvidia-ampere-architecture-whitepaper.pdf."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Eunhyeok Park et al. 2018. Energy-efficient neural network accelerator based on outlier-aware low-precision computation. In ISCA.","DOI":"10.1109\/ISCA.2018.00063"},{"key":"e_1_3_2_1_17_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. NIPS","author":"Adam Paszke","year":"2019","unstructured":"Adam Paszke et al. 2019. Pytorch: An imperative style, high-performance deep learning library. NIPS (2019)."},{"key":"e_1_3_2_1_18_1","volume-title":"Algorithms for sparse linear systems","author":"Scott Jennifer","unstructured":"Jennifer Scott and Miroslav Tuuma. 2023. Algorithms for sparse linear systems. Springer Nature."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Hardik Sharma et al. 2018. Bit fusion: Bit-level dynamically composable architecture for accelerating deep neural network. In ISCA.","DOI":"10.1109\/ISCA.2018.00069"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Zhuoran Song et al. 2020. Drq: dynamic region-based quantization for deep neural network acceleration. In ISCA.","DOI":"10.1109\/ISCA45697.2020.00086"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Thierry Tambe et al. 2020. Algorithm-hardware co-design of adaptive floating-point encodings for resilient deep learning inference. In DAC.","DOI":"10.1109\/DAC18072.2020.9218516"},{"key":"e_1_3_2_1_22_1","volume-title":"Estimating the number of clusters in a data set via the gap statistic. J R STAT SOC B","author":"Tibshirani Robert","year":"2001","unstructured":"Robert Tibshirani, Guenther Walther, and Trevor Hastie. 2001. Estimating the number of clusters in a data set via the gap statistic. J R STAT SOC B (2001)."},{"key":"e_1_3_2_1_23_1","volume-title":"GLUE: A multi-task benchmark and analysis platform for natural language understanding. arXiv","author":"Alex Wang","year":"2018","unstructured":"Alex Wang et al. 2018. GLUE: A multi-task benchmark and analysis platform for natural language understanding. arXiv (2018)."},{"key":"e_1_3_2_1_24_1","volume-title":"Smoothquant: Accurate and efficient post-training quantization for large language models. In ICML.","author":"Guangxuan Xiao","year":"2023","unstructured":"Guangxuan Xiao et al. 2023. Smoothquant: Accurate and efficient post-training quantization for large language models. In ICML."},{"key":"e_1_3_2_1_25_1","volume-title":"Gobo: Quantizing attention-based nlp models for low latency and energy efficient inference. In MICRO.","author":"Zadeh Ali Hadi","year":"2020","unstructured":"Ali Hadi Zadeh et al. 2020. Gobo: Quantizing attention-based nlp models for low latency and energy efficient inference. In MICRO."}],"event":{"name":"DAC '24: 61st ACM\/IEEE Design Automation Conference","location":"San Francisco CA USA","acronym":"DAC '24","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE-CEDA","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 61st ACM\/IEEE Design Automation Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649329.3655896","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3649329.3655896","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:48Z","timestamp":1750295868000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649329.3655896"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,23]]},"references-count":25,"alternative-id":["10.1145\/3649329.3655896","10.1145\/3649329"],"URL":"https:\/\/doi.org\/10.1145\/3649329.3655896","relation":{},"subject":[],"published":{"date-parts":[[2024,6,23]]},"assertion":[{"value":"2024-11-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}