{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T14:48:41Z","timestamp":1776955721713,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62325405"],"award-info":[{"award-number":["62325405"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1145\/3626202.3637562","type":"proceedings-article","created":{"date-parts":[[2024,4,2]],"date-time":"2024-04-02T18:04:51Z","timestamp":1712081091000},"page":"223-234","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":104,"title":["FlightLLM: Efficient Large Language Model Inference with a Complete Mapping Flow on FPGAs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1030-3748","authenticated-orcid":false,"given":"Shulin","family":"Zeng","sequence":"first","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8280-9072","authenticated-orcid":false,"given":"Jun","family":"Liu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0849-3252","authenticated-orcid":false,"given":"Guohao","family":"Dai","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9739-2930","authenticated-orcid":false,"given":"Xinhao","family":"Yang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3508-1755","authenticated-orcid":false,"given":"Tianyu","family":"Fu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7095-7963","authenticated-orcid":false,"given":"Hongyi","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2349-7286","authenticated-orcid":false,"given":"Wenheng","family":"Ma","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7875-2064","authenticated-orcid":false,"given":"Hanbo","family":"Sun","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4755-6881","authenticated-orcid":false,"given":"Shiyao","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1273-2573","authenticated-orcid":false,"given":"Zixiao","family":"Huang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9193-8494","authenticated-orcid":false,"given":"Yadong","family":"Dai","sequence":"additional","affiliation":[{"name":"Infinigence-AI, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8903-8276","authenticated-orcid":false,"given":"Jintao","family":"Li","sequence":"additional","affiliation":[{"name":"Infinigence-AI, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5958-9599","authenticated-orcid":false,"given":"Zehao","family":"Wang","sequence":"additional","affiliation":[{"name":"Infinigence-AI, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3401-5237","authenticated-orcid":false,"given":"Ruoyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Infinigence-AI, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4357-2717","authenticated-orcid":false,"given":"Kairui","family":"Wen","sequence":"additional","affiliation":[{"name":"Infinigence-AI, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2209-8312","authenticated-orcid":false,"given":"Xuefei","family":"Ning","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6108-5157","authenticated-orcid":false,"given":"Yu","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,4,2]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2017. Deep Learning with INT8 Optimization on Xilinx Devices. [Online]. https:\/\/docs.xilinx.com\/v\/u\/en-US\/wp486-deep-learning-int8."},{"key":"e_1_3_2_1_2_1","unstructured":"2022. nvprof. [Online]. https:\/\/docs.nvidia.com\/cuda\/profiler-users-guide\/index. html."},{"key":"e_1_3_2_1_3_1","unstructured":"2022. Xilinx Board Utility Tool. [Online]. https:\/\/xilinx.github.io\/XRT\/2021.1\/ html\/xbutil2.html."},{"key":"e_1_3_2_1_4_1","volume-title":"Longformer: The longdocument transformer. arXiv preprint arXiv:2004.05150","author":"Beltagy Iz","year":"2020","unstructured":"Iz Beltagy, Matthew E Peters, and Arman Cohan. 2020. Longformer: The longdocument transformer. arXiv preprint arXiv:2004.05150 (2020)."},{"key":"e_1_3_2_1_5_1","unstructured":"Rishi Bommasani Drew A Hudson Ehsan Adeli Russ Altman Simran Arora Sydney von Arx Michael S Bernstein Jeannette Bohg Antoine Bosselut Emma Brunskill et al. 2021. On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258 (2021)."},{"key":"e_1_3_2_1_6_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877--1901."},{"key":"e_1_3_2_1_7_1","volume-title":"LLM-empowered Chatbots for Psychiatrist and Patient Simulation: Application and Evaluation. arXiv preprint arXiv:2305.13614","author":"Chen Siyuan","year":"2023","unstructured":"Siyuan Chen, Mengyue Wu, Kenny Q Zhu, Kunyao Lan, Zhiling Zhang, and Lyuchun Cui. 2023. LLM-empowered Chatbots for Psychiatrist and Patient Simulation: Application and Evaluation. arXiv preprint arXiv:2305.13614 (2023)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3572848.3577500"},{"key":"e_1_3_2_1_9_1","volume-title":"Generating long sequences with sparse transformers. arXiv preprint arXiv:1904.10509","author":"Child Rewon","year":"2019","unstructured":"Rewon Child, Scott Gray, Alec Radford, and Ilya Sutskever. 2019. Generating long sequences with sparse transformers. arXiv preprint arXiv:1904.10509 (2019)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3061394"},{"key":"e_1_3_2_1_11_1","unstructured":"Together Computer. 2023. RedPajama: An Open Source Recipe to Reproduce LLaMA training dataset. https:\/\/github.com\/togethercomputer\/RedPajama-Data"},{"key":"e_1_3_2_1_12_1","volume-title":"ChatLaw: Open-Source Legal Large Language Model with Integrated External Knowledge Bases. arXiv:2306.16092","author":"Cui Jiaxi","year":"2023","unstructured":"Jiaxi Cui, Zongjian Li, Yang Yan, Bohua Chen, and Li Yuan. 2023. ChatLaw: Open-Source Legal Large Language Model with Integrated External Knowledge Bases. arXiv:2306.16092 (2023)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.2976475"},{"key":"e_1_3_2_1_14_1","volume-title":"int8 (): 8-bit matrix multiplication for transformers at scale. arXiv preprint arXiv:2208.07339","author":"Dettmers Tim","year":"2022","unstructured":"Tim Dettmers, Mike Lewis, Younes Belkada, and Luke Zettlemoyer. 2022. Llm. int8 (): 8-bit matrix multiplication for transformers at scale. arXiv preprint arXiv:2208.07339 (2022)."},{"key":"e_1_3_2_1_15_1","volume-title":"int8 (): 8-bit matrix multiplication for transformers at scale. arXiv preprint arXiv:2208.07339","author":"Dettmers Tim","year":"2022","unstructured":"Tim Dettmers, Mike Lewis, Younes Belkada, and Luke Zettlemoyer. 2022. Llm. int8 (): 8-bit matrix multiplication for transformers at scale. arXiv preprint arXiv:2208.07339 (2022)."},{"key":"e_1_3_2_1_16_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv:1810.04805 [cs.CL]","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv:1810.04805 [cs.CL]"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00050"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i11.26502"},{"key":"e_1_3_2_1_19_1","unstructured":"Elias Frantar and Dan Alistarh. 2023. SparseGPT: Massive Language Models Can Be Accurately Pruned in One-Shot. (2023)."},{"key":"e_1_3_2_1_20_1","volume-title":"Gptq: Accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:2210.17323","author":"Frantar Elias","year":"2022","unstructured":"Elias Frantar, Saleh Ashkboos, Torsten Hoefler, and Dan Alistarh. 2022. Gptq: Accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:2210.17323 (2022)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3490422.3502367"},{"key":"e_1_3_2_1_22_1","volume-title":"Angel-eye: A complete design flow for mapping CNN onto embedded FPGA","author":"Guo Kaiyuan","year":"2017","unstructured":"Kaiyuan Guo, Lingzhi Sui, Jiantao Qiu, Jincheng Yu, JunbinWang, Song Yao, Song Han, Yu Wang, and Huazhong Yang. 2017. Angel-eye: A complete design flow for mapping CNN onto embedded FPGA. IEEE transactions on computer-aided design of integrated circuits and systems 37, 1 (2017), 35--47."},{"key":"e_1_3_2_1_23_1","volume-title":"Seonghak Kim, Young H. Oh, Yeonhong Park, Yoonho Song, Jung-Hun Park, Sanghee Lee, Kyoung Park, Jae W. Lee, and Deog- Kyoon Jeong.","author":"Ham Tae Jun","year":"2020","unstructured":"Tae Jun Ham, Sung Jun Jung, Seonghak Kim, Young H. Oh, Yeonhong Park, Yoonho Song, Jung-Hun Park, Sanghee Lee, Kyoung Park, Jae W. Lee, and Deog- Kyoon Jeong. 2020. A3: Accelerating Attention Mechanisms in Neural Networks with Approximation. arXiv:2002.10941 [cs.DC]"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00060"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/HCS55958.2022.9895626"},{"key":"e_1_3_2_1_26_1","volume-title":"A Study on the Implementation of Generative AI Services Using an Enterprise Data-Based LLM Application Architecture. arXiv preprint arXiv:2309.01105","author":"Jeong Cheonsu","year":"2023","unstructured":"Cheonsu Jeong. 2023. A Study on the Implementation of Generative AI Services Using an Enterprise Data-Based LLM Application Architecture. arXiv preprint arXiv:2309.01105 (2023)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575747"},{"key":"e_1_3_2_1_28_1","volume-title":"SqueezeLLM: Dense-and-Sparse Quantization. arXiv preprint arXiv:2306.07629","author":"Kim Sehoon","year":"2023","unstructured":"Sehoon Kim, Coleman Hooper, Amir Gholami, Zhen Dong, Xiuyu Li, Sheng Shen, Michael W Mahoney, and Kurt Keutzer. 2023. SqueezeLLM: Dense-and-Sparse Quantization. arXiv preprint arXiv:2306.07629 (2023)."},{"key":"e_1_3_2_1_29_1","volume-title":"Reformer: The efficient transformer. arXiv preprint arXiv:2001.04451","author":"Kitaev Nikita","year":"2020","unstructured":"Nikita Kitaev, Lukasz Kaiser, and Anselm Levskaya. 2020. Reformer: The efficient transformer. arXiv preprint arXiv:2001.04451 (2020)."},{"key":"e_1_3_2_1_30_1","first-page":"24101","article-title":"A fast post-training pruning framework for transformers","volume":"35","author":"Kwon Woosuk","year":"2022","unstructured":"Woosuk Kwon, Sehoon Kim, MichaelWMahoney, Joseph Hassoun, Kurt Keutzer, and Amir Gholami. 2022. A fast post-training pruning framework for transformers. Advances in Neural Information Processing Systems 35 (2022), 24101--24116.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"e_1_3_2_1_32_1","volume-title":"FTRANS: Energy-Efficient Acceleration of Transformers using FPGA. arXiv:2007.08563 [cs.DC]","author":"Li Bingbing","year":"2020","unstructured":"Bingbing Li, Santosh Pandey, Haowen Fang, Yanjun Lyv, Ji Li, Jieyang Chen, Mimi Xie, Lipeng Wan, Hang Liu, and Caiwen Ding. 2020. FTRANS: Energy-Efficient Acceleration of Transformers using FPGA. arXiv:2007.08563 [cs.DC]"},{"key":"e_1_3_2_1_33_1","volume-title":"AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration. arXiv preprint arXiv:2306.00978","author":"Lin Ji","year":"2023","unstructured":"Ji Lin, Jiaming Tang, Haotian Tang, Shang Yang, Xingyu Dang, and Song Han. 2023. AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration. arXiv preprint arXiv:2306.00978 (2023)."},{"key":"e_1_3_2_1_34_1","volume-title":"Sanger:ACo-Design Framework for Enabling Sparse Attention Using Reconfigurable Architecture. In MICRO-54: 54th Annual IEEE\/ACM International Symposium on Microarchitecture","author":"Lu Liqiang","year":"2021","unstructured":"Liqiang Lu, Yicheng Jin, Hangrui Bi, Zizhang Luo, Peng Li, Tao Wang, and Yun Liang. 2021. Sanger:ACo-Design Framework for Enabling Sparse Attention Using Reconfigurable Architecture. In MICRO-54: 54th Annual IEEE\/ACM International Symposium on Microarchitecture (Virtual Event, Greece) (MICRO '21). Association for Computing Machinery, New York, NY, USA, 977--991. https:\/\/doi.org\/10. 1145\/3466752.3480125"},{"key":"e_1_3_2_1_35_1","volume-title":"Pointer sentinel mixture models. arXiv preprint arXiv:1609.07843","author":"Merity Stephen","year":"2016","unstructured":"Stephen Merity, Caiming Xiong, James Bradbury, and Richard Socher. 2016. Pointer sentinel mixture models. arXiv preprint arXiv:1609.07843 (2016)."},{"key":"e_1_3_2_1_36_1","volume-title":"Shi Qiu, Muhammad Saqib, Saeed Anwar, Muhammad Usman, Nick Barnes, and Ajmal S. Mian.","author":"Naveed Humza","year":"2023","unstructured":"Humza Naveed, Asad Ullah Khan, Shi Qiu, Muhammad Saqib, Saeed Anwar, Muhammad Usman, Nick Barnes, and Ajmal S. Mian. 2023. A Comprehensive Overview of Large Language Models. ArXiv abs\/2307.06435 (2023)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589057"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00068"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3490422.3502364"},{"key":"e_1_3_2_1_40_1","volume-title":"Kabilan Elangovan, Laura Gutierrez, Ting Fang Tan, and Daniel ShuWei Ting.","author":"Thirunavukarasu Arun James","year":"2023","unstructured":"Arun James Thirunavukarasu, Darren Shu Jeng Ting, Kabilan Elangovan, Laura Gutierrez, Ting Fang Tan, and Daniel ShuWei Ting. 2023. Large language models in medicine. Nature medicine 29, 8 (2023), 1930--1940."},{"key":"e_1_3_2_1_41_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_42_1","volume-title":"Practitioners' Expectations on Code Completion. ArXiv abs\/2301.03846","author":"Wang Chaozheng","year":"2023","unstructured":"Chaozheng Wang, Junhao Hu, Cuiyun Gao, Yu Jin, Tao Xie, Hailiang Huang, Zhenyu Lei, and Yuetang Deng. 2023. Practitioners' Expectations on Code Completion. ArXiv abs\/2301.03846 (2023)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3490422.3502360"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10070997"},{"key":"e_1_3_2_1_45_1","volume-title":"2021 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE, 97--110","author":"Zhang Zhekai","year":"2021","unstructured":"HanruiWang, Zhekai Zhang, and Song Han. 2021. Spatten: Efficient sparse attention architecture with cascade token and head pruning. In 2021 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE, 97--110."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM48280.2020.00024"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247678"},{"key":"e_1_3_2_1_48_1","unstructured":"Jason Wei Yi Tay Rishi Bommasani Colin Raffel Barret Zoph Sebastian Borgeaud Dani Yogatama Maarten Bosma Denny Zhou Donald Metzler et al. 2022. Emergent abilities of large language models. arXiv preprint arXiv:2206.07682 (2022)."},{"key":"e_1_3_2_1_49_1","volume-title":"International Conference on Machine Learning. PMLR, 38087--38099","author":"Xiao Guangxuan","year":"2023","unstructured":"Guangxuan Xiao, Ji Lin, Mickael Seznec, Hao Wu, Julien Demouth, and Song Han. 2023. Smoothquant: Accurate and efficient post-training quantization for large language models. In International Conference on Machine Learning. PMLR, 38087--38099."},{"key":"e_1_3_2_1_50_1","unstructured":"Xilinx. 2021. Alveo U280 Data Center Accelerator Card Data Sheet. https: \/\/docs.xilinx.com\/v\/u\/en-US\/ds963-u280."},{"key":"e_1_3_2_1_51_1","unstructured":"Xilinx. 2023. Versal? Architecture and Product Data Sheet. https:\/\/docs.xilinx. com\/v\/u\/en-US\/ds950-versal-overview."},{"key":"e_1_3_2_1_52_1","first-page":"27168","article-title":"Zeroquant: Efficient and affordable post-training quantization for large-scale transformers","volume":"35","author":"Yao Zhewei","year":"2022","unstructured":"Zhewei Yao, Reza Yazdani Aminabadi, Minjia Zhang, Xiaoxia Wu, Conglong Li, and Yuxiong He. 2022. Zeroquant: Efficient and affordable post-training quantization for large-scale transformers. Advances in Neural Information Processing Systems 35 (2022), 27168--27183.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_53_1","volume-title":"Joshua Ainslie, Chris Alberti, Santiago Ontanon, Philip Pham, Anirudh Ravula, Qifan Wang, Li Yang, et al.","author":"Zaheer Manzil","year":"2020","unstructured":"Manzil Zaheer, Guru Guruganesh, Kumar Avinava Dubey, Joshua Ainslie, Chris Alberti, Santiago Ontanon, Philip Pham, Anirudh Ravula, Qifan Wang, Li Yang, et al. 2020. Big bird: Transformers for longer sequences. Advances in neural information processing systems 33 (2020), 17283--17297."},{"key":"e_1_3_2_1_54_1","volume-title":"Xi Victoria Lin, et al. [n. d.]. Opt: Open pre-trained transformer language models","author":"Zhang Susan","year":"2022","unstructured":"Susan Zhang, Stephen Roller, Naman Goyal, Mikel Artetxe, Moya Chen, Shuohui Chen, Christopher Dewan, Mona Diab, Xian Li, Xi Victoria Lin, et al. [n. d.]. Opt: Open pre-trained transformer language models, 2022. URL https:\/\/arxiv. org\/abs\/2205.01068 ([n. d.])."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3431920.3439296"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3291058"},{"key":"e_1_3_2_1_57_1","volume-title":"Learning n:mfine-grained structured sparse neural networks from scratch. arXiv preprint arXiv:2102.04010","author":"Zhou Aojun","year":"2021","unstructured":"Aojun Zhou, Yukun Ma, Junnan Zhu, Jianbo Liu, Zhijie Zhang, Kun Yuan,Wenxiu Sun, and Hongsheng Li. 2021. Learning n:mfine-grained structured sparse neural networks from scratch. arXiv preprint arXiv:2102.04010 (2021)."}],"event":{"name":"FPGA '24: The 2024 ACM\/SIGDA International Symposium on Field Programmable Gate Arrays","location":"Monterey CA USA","acronym":"FPGA '24","sponsor":["SIGDA ACM Special Interest Group on Design Automation"]},"container-title":["Proceedings of the 2024 ACM\/SIGDA International Symposium on Field Programmable Gate Arrays"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626202.3637562","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626202.3637562","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T22:04:31Z","timestamp":1755900271000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626202.3637562"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4]]},"references-count":57,"alternative-id":["10.1145\/3626202.3637562","10.1145\/3626202"],"URL":"https:\/\/doi.org\/10.1145\/3626202.3637562","relation":{},"subject":[],"published":{"date-parts":[[2024,4]]},"assertion":[{"value":"2024-04-02","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}