{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T05:41:43Z","timestamp":1775281303120,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":71,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,3,30]],"date-time":"2026-03-30T00:00:00Z","timestamp":1774828800000},"content-version":"vor","delay-in-days":365,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSF","award":["CCF-2047516 (CAREER) and CCF2146873"],"award-info":[{"award-number":["CCF-2047516 (CAREER) and CCF2146873"]}]},{"DOI":"10.13039\/501100006374","name":"NIH (National Institutes of Health)","doi-asserted-by":"publisher","award":["NIH-1R01HD108473-01"],"award-info":[{"award-number":["NIH-1R01HD108473-01"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"USDA","award":["P24-001771"],"award-info":[{"award-number":["P24-001771"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,30]]},"DOI":"10.1145\/3676641.3716257","type":"proceedings-article","created":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T16:47:32Z","timestamp":1743094052000},"page":"451-466","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Generalizing Reuse Patterns for Efficient DNN on Microcontrollers"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8311-020X","authenticated-orcid":false,"given":"Jiesong","family":"Liu","sequence":"first","affiliation":[{"name":"North Carolina State University, Raleigh, NC, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4116-5237","authenticated-orcid":false,"given":"Bin","family":"Ren","sequence":"additional","affiliation":[{"name":"William and Mary, Williamsburg, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3599-8010","authenticated-orcid":false,"given":"Xipeng","family":"Shen","sequence":"additional","affiliation":[{"name":"North Carolina State University, Raleigh, NC, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,3,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"http:\/\/places.csail.mit.edu\/deepscene\/small-projects\/TRNpytorch-pose\/model_zoo\/models\/slim\/nets\/cifarnet.py","year":"2020","unstructured":"CifarNet. http:\/\/places.csail.mit.edu\/deepscene\/small-projects\/TRNpytorch-pose\/model_zoo\/models\/slim\/nets\/cifarnet.py, 2020."},{"key":"e_1_3_2_1_2_1","first-page":"517","article-title":"Neural network architectures for deploying tinyml applications on commodity microcontrollers","volume":"3","author":"Banbury Colby","year":"2021","unstructured":"Colby Banbury, Chuteng Zhou, Igor Fedorov, Ramon Matas, Urmish Thakker, Dibakar Gope, Vijay Janapa Reddi, Matthew Mattina, and Paul Whatmough. Micronets: Neural network architectures for deploying tinyml applications on commodity microcontrollers. Proceedings of Machine Learning and Systems, 3:517--532, 2021.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2018.8489437"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/I-SMAC47947.2019.9032589"},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of machine learning and systems, 2:129--146","author":"Blalock Davis","year":"2020","unstructured":"Davis Blalock, Jose Javier Gonzalez Ortiz, Jonathan Frankle, and John Guttag. What is the state of neural network pruning? Proceedings of machine learning and systems, 2:129--146, 2020."},{"key":"e_1_3_2_1_6_1","volume-title":"International Conference on Learning Representations","author":"Chen Beidi","year":"2021","unstructured":"Beidi Chen, Zichang Liu, Binghui Peng, Zhaozhuo Xu, Jonathan Lingjie Li, Tri Dao, Zhao Song, Anshumali Shrivastava, and Christopher Re. Mongoose: A learnable lsh framework for efficient neural network training. In International Conference on Learning Representations, 2021."},{"key":"e_1_3_2_1_7_1","volume-title":"February","author":"Chen Jou-An","year":"2023","unstructured":"Jou-An Chen, Wei Niu, Bin Ren, Yanzhi Wang, and Xipeng Shen. Survey: Exploiting data redundancy for optimization of deep learning. ACM Comput. Surv., 55(10), February 2023."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/MWSCAS47672.2021.9531720"},{"issue":"1","key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3588684","article-title":"Efficient parallel graph analytics with rule-based compression","volume":"1","author":"Chen Zheng","year":"2023","unstructured":"Zheng Chen, Feng Zhang, Jiawei Guan, Jidong Zhai, Xipeng Shen, Huanchen Zhang, Wentong Shu, and Xiaoyong Du. Compressgraph: Efficient parallel graph analytics with rule-based compression. Proceedings of the ACM on Management of Data, 1(1):1--31, 2023.","journal-title":"Proceedings of the ACM on Management of Data"},{"key":"e_1_3_2_1_10_1","volume-title":"Energy efficient boosting of gemm accelerators for dnn via reuse. ACM Transactions on Design Automation of Electronic Systems (TODAES), 27(5):1--26","author":"Cicek Nihat Mert","year":"2022","unstructured":"Nihat Mert Cicek, Xipeng Shen, and Ozcan Ozturk. Energy efficient boosting of gemm accelerators for dnn via reuse. ACM Transactions on Design Automation of Electronic Systems (TODAES), 27(5):1--26, 2022."},{"key":"e_1_3_2_1_11_1","volume-title":"Cortex\u00ae-m4 technical reference manual","author":"Company Arm","year":"2010","unstructured":"Arm Company. Cortex\u00ae-m4 technical reference manual, 2010."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISLPED.2019.8824955"},{"key":"e_1_3_2_1_13_1","first-page":"32","article-title":"Sparse architecture search for cnns on resource-constrained microcontrollers","author":"Fedorov Igor","year":"2019","unstructured":"Igor Fedorov, Ryan P Adams, Matthew Mattina, and Paul Whatmough. Sparse: Sparse architecture search for cnns on resource-constrained microcontrollers. Advances in Neural Information Processing Systems, 32, 2019.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_14_1","first-page":"18456","article-title":"Unified dnas for compressible tinyml models for neural processing units","volume":"35","author":"Fedorov Igor","year":"2022","unstructured":"Igor Fedorov, Ramon Matas, Hokchhay Tann, Chuteng Zhou, Matthew Mattina, and Paul Whatmough. Udc: Unified dnas for compressible tinyml models for neural processing units. Advances in Neural Information Processing Systems, 35:18456--18471, 2022.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.330"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM51629.2021.00125"},{"key":"e_1_3_2_1_17_1","first-page":"2022","article-title":"Transient redundancy elimination-based convolution","volume":"35","author":"Guan Jiawei","year":"2022","unstructured":"Jiawei Guan, Feng Zhang, Jiesong Liu, Hsin-Hsuan Sung, Ruofan Wu, Xiaoyong Du, and Xipeng Shen. Trec: Transient redundancy elimination-based convolution. In Neural Information Processing Systems 35 (Neurips 2022), 2022.","journal-title":"Neural Information Processing Systems"},{"key":"e_1_3_2_1_18_1","first-page":"1331","volume-title":"International Conference on Machine Learning","author":"Gupta Chirag","year":"2017","unstructured":"Chirag Gupta, Arun Sai Suggala, Ankit Goyal, Harsha Vardhan Simhadri, Bhargavi Paranjape, Ashish Kumar, Saurabh Goyal, Raghavendra Udupa, Manik Varma, and Prateek Jain. Protonn: Compressed and accurate knn for resource-scarce devices. In International Conference on Machine Learning, pages 1331--1340. PMLR, 2017."},{"key":"e_1_3_2_1_19_1","volume-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149","author":"Han Song","year":"2015","unstructured":"Song Han, Huizi Mao, and William J Dally. Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149, 2015."},{"key":"e_1_3_2_1_20_1","volume-title":"Prajit Ramachandran, Mohammad Babaeizadeh, Honghui Shi, Jianan Li, Shuicheng Yan, and Thomas S Huang.","author":"Han Wei","year":"2016","unstructured":"Wei Han, Pooya Khorrami, Tom Le Paine, Prajit Ramachandran, Mohammad Babaeizadeh, Honghui Shi, Jianan Li, Shuicheng Yan, and Thomas S Huang. Seq-NMS for Video Object Detection. arXiv preprint arXiv:1602.08465, 2016."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527419"},{"key":"e_1_3_2_1_22_1","volume-title":"Distilling the knowledge in a neural network","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. Distilling the knowledge in a neural network (2015). arXiv preprint arXiv:1503.02531, 2, 2015."},{"key":"e_1_3_2_1_23_1","volume-title":"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and 0.5 mb model size. arXiv preprint arXiv:1602.07360","author":"Iandola Forrest N","year":"2016","unstructured":"Forrest N Iandola, Song Han, Matthew W Moskewicz, Khalid Ashraf, William J Dally, and Kurt Keutzer. Squeezenet: Alexnet-level accuracy with 50x fewer parameters and 0.5 mb model size. arXiv preprint arXiv:1602.07360, 2016."},{"key":"e_1_3_2_1_24_1","volume-title":"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and 0.5 MB model size. arXiv preprint arXiv:1602.07360","author":"Iandola Forrest N","year":"2016","unstructured":"Forrest N Iandola, Song Han, Matthew W Moskewicz, Khalid Ashraf, William J Dally, and Kurt Keutzer. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and 0.5 MB model size. arXiv preprint arXiv:1602.07360, 2016."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2941491"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2017.2736553"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614299"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.15388\/20-INFOR398"},{"key":"e_1_3_2_1_29_1","volume-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky Alex","year":"2009","unstructured":"Alex Krizhevsky and Geoffrey Hinton. Learning multiple layers of features from tiny images. 2009."},{"key":"e_1_3_2_1_30_1","first-page":"1935","volume-title":"International Conference on Machine Learning","author":"Kumar Ashish","year":"2017","unstructured":"Ashish Kumar, Saurabh Goyal, and Manik Varma. Resource-efficient machine learning in 2 kb ram for the internet of things. In International Conference on Machine Learning, pages 1935--1944. PMLR, 2017."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358252"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2020.2985963"},{"key":"e_1_3_2_1_33_1","volume-title":"Cmsis-nn: Efficient neural network kernels for arm cortex-m cpus. arXiv preprint arXiv:1801.06601","author":"Lai Liangzhen","year":"2018","unstructured":"Liangzhen Lai, Naveen Suda, and Vikas Chandra. Cmsis-nn: Efficient neural network kernels for arm cortex-m cpus. arXiv preprint arXiv:1801.06601, 2018."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460319.3464816"},{"key":"e_1_3_2_1_35_1","volume-title":"Mcunetv2: Memory-efficient patch-based inference for tiny deep learning. arXiv preprint arXiv:2110.15352","author":"Lin Ji","year":"2021","unstructured":"Ji Lin, Wei-Ming Chen, Yujun Lin, Han Cai, Chuang Gan, and Song Han. Mcunetv2: Memory-efficient patch-based inference for tiny deep learning. arXiv preprint arXiv:2110.15352, 2021."},{"key":"e_1_3_2_1_36_1","first-page":"11711","article-title":"Tiny deep learning on iot devices","volume":"33","author":"Lin Ji","year":"2020","unstructured":"Ji Lin,Wei-Ming Chen, Yujun Lin, Chuang Gan, and Song Han. Mcunet: Tiny deep learning on iot devices. Advances in Neural Information Processing Systems, 33:11711--11722, 2020.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582062"},{"key":"e_1_3_2_1_38_1","article-title":"Enabling efficient deep learning on mcu with transient redundancy elimination","author":"Liu Jiesong","year":"2024","unstructured":"Jiesong Liu, Feng Zhang, Jiawei Guan, Hsin-Hsuan Sung, Xiaoguang Guo, Saiqin Long, Xiaoyong Du, and Xipeng Shen. Enabling efficient deep learning on mcu with transient redundancy elimination. IEEE Transactions on Computers, 2024.","journal-title":"IEEE Transactions on Computers"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053395"},{"key":"e_1_3_2_1_40_1","volume-title":"NIPSWorkshop on Deep Learning and Unsupervised Feature Learning","author":"Netzer Yuval","year":"2011","unstructured":"Yuval Netzer, Tao Wang, Adam Coates, Alessandro Bissacco, Bo Wu, and Andrew Y. Ng. Reading digits in natural images with unsupervised feature learning. In NIPSWorkshop on Deep Learning and Unsupervised Feature Learning, 2011."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2019.00138"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3330345.3330384"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3330345.3330384"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICITSI50517.2020.9264979"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3132493"},{"issue":"12","key":"e_1_3_2_1_46_1","first-page":"2950","article-title":"Exploring data analytics without decompression on embedded gpu systems","volume":"32","author":"Pan Zaifeng","year":"2021","unstructured":"Zaifeng Pan, Feng Zhang, Yanliang Zhou, Jidong Zhai, Xipeng Shen, Onur Mutlu, and Xiaoyong Du. Exploring data analytics without decompression on embedded gpu systems. IEEE Transactions on Parallel and Distributed Systems, 32(12):2950--2964, 2021.","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2018.8451355"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00016"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2008.2002919"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCWorkshops49005.2020.9145068"},{"key":"e_1_3_2_1_51_1","first-page":"342","volume-title":"International Conference on Transforming IDEAS (Inter-Disciplinary Exchanges, Analysis, and Search) into Viable Solutions","author":"Sharma Prerna","year":"2019","unstructured":"Prerna Sharma and Deepali Kamthania. Intelligent object detection and avoidance system. In International Conference on Transforming IDEAS (Inter-Disciplinary Exchanges, Analysis, and Search) into Viable Solutions, pages 342--351, 2019."},{"key":"e_1_3_2_1_52_1","volume-title":"Tinyml for ubiquitous edge ai. arXiv preprint arXiv:2102.01255","author":"Soro Stanislava","year":"2021","unstructured":"Stanislava Soro. Tinyml for ubiquitous edge ai. arXiv preprint arXiv:2102.01255, 2021."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASPDAC.2011.5722252"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/GCCE.2017.8229283"},{"key":"e_1_3_2_1_55_1","first-page":"1807","volume-title":"Proceedings of the ACMWeb Conference 2022","author":"Zhang Feng","year":"2022","unstructured":"RuofanWu, Feng Zhang, Jiawei Guan, Zhen Zheng, Xiaoyong Du, and Xipeng Shen. Drew: Efficient winograd cnn inference with deep reuse. In Proceedings of the ACMWeb Conference 2022, pages 1807--1816, 2022."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01237-3_30"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41928-018-0059-3"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-02117-4"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/1328491.1328509"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2014.2357035"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE51399.2021.00148"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2016.2586074"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.14778\/3236187.3236203"},{"key":"e_1_3_2_1_65_1","first-page":"347","volume-title":"Proceedings of the 32nd ACM International Conference on Supercomputing (ICS)","author":"Zhang Feng","year":"2018","unstructured":"Feng Zhang, Jidong Zhai, Xipeng Shen, Onur Mutlu, and Wenguang Chen. Zwift: A programming framework for high performance text analytics on compressed data. In Proceedings of the 32nd ACM International Conference on Supercomputing (ICS), pages 347--358. ACM, 2018."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE48307.2020.00097"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3093234"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-020-00636-3"},{"key":"e_1_3_2_1_69_1","volume-title":"Hello edge: Keyword spotting on microcontrollers. arXiv preprint arXiv:1711.07128","author":"Zhang Yundong","year":"2017","unstructured":"Yundong Zhang, Naveen Suda, Liangzhen Lai, and Vikas Chandra. Hello edge: Keyword spotting on microcontrollers. arXiv preprint arXiv:1711.07128, 2017."},{"key":"e_1_3_2_1_70_1","first-page":"408","volume-title":"Proceedings of the IEEE international conference on computer vision","author":"Zhu Xizhou","year":"2017","unstructured":"Xizhou Zhu, Yujie Wang, Jifeng Dai, Lu Yuan, and Yichen Wei. Flowguided feature aggregation for video object detection. In Proceedings of the IEEE international conference on computer vision, pages 408--417, 2017."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.441"}],"event":{"name":"ASPLOS '25: 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","location":"Rotterdam Netherlands","acronym":"ASPLOS '25","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676641.3716257","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3676641.3716257","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3676641.3716257","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3676641.3716257","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T11:10:40Z","timestamp":1755774640000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676641.3716257"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,30]]},"references-count":71,"alternative-id":["10.1145\/3676641.3716257","10.1145\/3676641"],"URL":"https:\/\/doi.org\/10.1145\/3676641.3716257","relation":{},"subject":[],"published":{"date-parts":[[2025,3,30]]},"assertion":[{"value":"2025-03-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}