{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T18:15:14Z","timestamp":1775067314229,"version":"3.50.1"},"reference-count":78,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Key-Area Research and Development Program of Guangdong Province","award":["2021B0101310002"],"award-info":[{"award-number":["2021B0101310002"]}]},{"DOI":"10.13039\/501100001809","name":"NSF of China","doi-asserted-by":"publisher","award":["61832020"],"award-info":[{"award-number":["61832020"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"NSF of China","doi-asserted-by":"publisher","award":["62032001"],"award-info":[{"award-number":["62032001"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"NSF of China","doi-asserted-by":"publisher","award":["92064006"],"award-info":[{"award-number":["92064006"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Beijing Academy of Artificial Intelligence"},{"DOI":"10.13039\/501100013314","name":"Higher Education Discipline Innovation Project","doi-asserted-by":"publisher","award":["B18001"],"award-info":[{"award-number":["B18001"]}],"id":[{"id":"10.13039\/501100013314","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Comput.-Aided Des. Integr. Circuits Syst."],"published-print":{"date-parts":[[2023,1]]},"DOI":"10.1109\/tcad.2022.3170848","type":"journal-article","created":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T19:45:30Z","timestamp":1651088730000},"page":"136-149","source":"Crossref","is-referenced-by-count":55,"title":["Energon: Toward Efficient Acceleration of Transformers Using Dynamic Sparse Attention"],"prefix":"10.1109","volume":"42","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7929-8054","authenticated-orcid":false,"given":"Zhe","family":"Zhou","sequence":"first","affiliation":[{"name":"Center for Energy-Efficient Computing and Applications, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5917-2251","authenticated-orcid":false,"given":"Junlin","family":"Liu","sequence":"additional","affiliation":[{"name":"Center for Energy-Efficient Computing and Applications, Peking University, Beijing, China"}]},{"given":"Zhenyu","family":"Gu","sequence":"additional","affiliation":[{"name":"DAMO Academy, Alibaba Inc., Beijing, China"}]},{"given":"Guangyu","family":"Sun","sequence":"additional","affiliation":[{"name":"Center for Energy-Efficient Computing and Applications, Peking University, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CCWC47524.2020.9031192"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/2228360.2228584"},{"key":"ref3","article-title":"Longformer: The long-document transformer","author":"Beltagy","year":"2020","journal-title":"arXiv:2004.05150"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"ref5","volume-title":"DRAMPower: Open-Source DRAM Power & Energy Estimation Tool","author":"Chandrasekar","year":"2012"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541967"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.40"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"ref9","article-title":"Generating long sequences with sparse transformers","author":"Child","year":"2019","journal-title":"arXiv:1904.10509"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00027"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref12","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv:1810.04805"},{"key":"ref13","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020","journal-title":"arXiv:2010.11929"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/2872887.2750389"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00035"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.30"},{"key":"ref17","article-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding","author":"Han","year":"2015","journal-title":"arXiv:1510.00149"},{"key":"ref18","article-title":"Gaussian error linear units (GELUs)","author":"Hendrycks","year":"2016","journal-title":"arXiv:1606.08415"},{"key":"ref19","article-title":"Axial attention in multidimensional transformers","author":"Ho","year":"2019","journal-title":"arXiv:1912.12180"},{"key":"ref20","volume-title":"Huggingface Transformers","year":"2021"},{"key":"ref21","volume-title":"Intel Xeon Gold 5220 Processor","year":"2021"},{"key":"ref22","volume-title":"ViT-Pytorch","year":"2021"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2015.2414456"},{"key":"ref25","article-title":"Reformer: The efficient transformer","volume-title":"8th Int. Conf. Learn. Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26\u201330, 2020","author":"Kitaev"},{"key":"ref26","article-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky","year":"2009"},{"key":"ref27","first-page":"1106","article-title":"ImageNet classification with deep convolutional neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst. 26th Annu. Conf. Neural Inf. Process. Syst. (NIPS)","author":"Krizhevsky"},{"key":"ref28","article-title":"AlBERT: A lite BERT for self-supervised learning of language representations","author":"Lan","year":"2019","journal-title":"arXiv:1909.11942"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00066"},{"key":"ref31","article-title":"RoBERTa: A robustly optimized BERT pretraining approach","author":"Liu","year":"2019","journal-title":"arXiv:1907.11692"},{"key":"ref32","first-page":"1","article-title":"Pointer sentinel mixture models","volume-title":"Proc. 5th Int. Conf. Learn. Represent. (ICLR)","author":"Merity"},{"key":"ref33","volume-title":"8Gb, 16Gb: 253-Ball, Dual-Channel Mobile LPDDR3 SDRAM","year":"2021"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2010-343"},{"key":"ref35","volume-title":"FreePDK45","year":"2021"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC49654.2021.9622829"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ASYNC48570.2021.00015"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/NORCHIP.2014.7004740"},{"key":"ref39","volume-title":"NVIDIA JETSON TX2"},{"key":"ref40","volume-title":"NVIDIA V100 Tensor Core GPU"},{"key":"ref41","volume-title":"CACTI","author":"Packard","year":"2021"},{"key":"ref42","first-page":"889","article-title":"A 4.3GB\/s mobile memory interface with power-efficient bandwidth scaling","volume-title":"Proc. Symp. Vlsi Circuits","author":"Palmer"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080254"},{"key":"ref44","article-title":"Pytorch: An imperative style, high-performance deep learning library","author":"Paszke","year":"2019","journal-title":"arXiv:1912.01703"},{"key":"ref45","volume-title":"pyRAPL","year":"2021"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.232"},{"issue":"8","key":"ref47","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI Blog"},{"issue":"140","key":"ref48","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1264"},{"key":"ref50","volume-title":"Raspberry Pi 4 Computer Model B","year":"2021"},{"key":"ref51","volume-title":"pyRAPL","year":"2021"},{"key":"ref52","article-title":"Efficient content-based sparse attention with routing transformers","author":"Roy","year":"2020","journal-title":"arXiv:2003.05997"},{"key":"ref53","article-title":"Scale-sim: Systolic cnn accelerator simulator","author":"Samajdar","year":"2018","journal-title":"arXiv:1811.02883"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00194"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6409"},{"key":"ref56","first-page":"1","article-title":"Very deep convolutional networks for large-scale image recognition","volume-title":"Proc. 3rd Int. Conf. Learn. Represent. (ICLR)","author":"Simonyan"},{"key":"ref57","first-page":"1","article-title":"Verilator and SystemPerl","volume-title":"Proc. North Amer. Syste. Users\u2019 Group (NASCUG) Meeting Des. Autom. Conf.","author":"Snyder"},{"key":"ref58","first-page":"455","article-title":"Parsing with compositional vector grammars","volume-title":"Proc. EMNLP","author":"Socher"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00086"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01625"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00756"},{"key":"ref62","first-page":"9438","article-title":"Sparse Sinkhorn attention","volume-title":"Proc. 37th Int. Conf. Mach. Learn. (ICML)","volume":"119","author":"Tay"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1145\/3530811"},{"key":"ref64","article-title":"Training data-efficient image transformers & distillation through attention","author":"Touvron","year":"2020","journal-title":"arXiv:2012.12877"},{"key":"ref65","first-page":"5998","article-title":"Attention is all you need","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Vaswani"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/w18-5446"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.686"},{"key":"ref68","article-title":"SpAtten: Efficient sparse attention architecture with cascade token and head pruning","author":"Wang","year":"2020","journal-title":"arXiv:2012.09852"},{"key":"ref69","volume-title":"Virtex Ultrascale+ HBM FPGA: A Revolutionary Increase in Memory Performance","year":"2021"},{"key":"ref70","first-page":"5754","article-title":"Xlnet: Generalized autoregressive pretraining for language understanding","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yang"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00071"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/EMC2-NIPS53020.2019.00016"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783723"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.37"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17325"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00011"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586181"},{"key":"ref78","article-title":"Deformable DETR: Deformable transformers for end-to-end object detection","author":"Zhu","year":"2020","journal-title":"arXiv:2010.04159"}],"container-title":["IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/43\/9996095\/09763839.pdf?arnumber=9763839","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T22:54:42Z","timestamp":1705964082000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9763839\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1]]},"references-count":78,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tcad.2022.3170848","relation":{},"ISSN":["0278-0070","1937-4151"],"issn-type":[{"value":"0278-0070","type":"print"},{"value":"1937-4151","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,1]]}}}