{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,24]],"date-time":"2026-06-24T15:07:52Z","timestamp":1782313672135,"version":"3.54.5"},"reference-count":26,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100018537","name":"National Science and Technology Major Project","doi-asserted-by":"publisher","award":["2022ZD0115200"],"award-info":[{"award-number":["2022ZD0115200"]}],"id":[{"id":"10.13039\/501100018537","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62125403"],"award-info":[{"award-number":["62125403"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["92464302"],"award-info":[{"award-number":["92464302"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U24B20164"],"award-info":[{"award-number":["U24B20164"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["92164301"],"award-info":[{"award-number":["92164301"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shanghai Municipal Science and Technology Major Project"},{"name":"Natural Science Foundation of Jiangsu Province Basic Research Program","award":["BK20243042"],"award-info":[{"award-number":["BK20243042"]}]},{"DOI":"10.13039\/501100001809","name":"Beijing National Research Center for Information Science and Technology","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Northern IC Technology Innovation Center (Beijing) Company Ltd","award":["QYJS20232801B"],"award-info":[{"award-number":["QYJS20232801B"]}]},{"name":"Beijing Advanced Innovation Center for Integrated Circuits"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Circuits Syst. II"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1109\/tcsii.2025.3596228","type":"journal-article","created":{"date-parts":[[2025,8,6]],"date-time":"2025-08-06T18:02:42Z","timestamp":1754503362000},"page":"1433-1437","source":"Crossref","is-referenced-by-count":9,"title":["BETA: A Bit-Grained Transformer Attention Accelerator With Efficient Early Termination"],"prefix":"10.1109","volume":"72","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9763-8208","authenticated-orcid":false,"given":"Huizheng","family":"Wang","sequence":"first","affiliation":[{"name":"School of Integrated Circuits, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hongbin","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4084-3478","authenticated-orcid":false,"given":"Zhiheng","family":"Yue","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6009-4232","authenticated-orcid":false,"given":"Jingyao","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5117-7920","authenticated-orcid":false,"given":"Taiquan","family":"Wei","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shaojun","family":"Wei","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6942-4395","authenticated-orcid":false,"given":"Yang","family":"Hu","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2309-572X","authenticated-orcid":false,"given":"Shouyi","family":"Yin","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","article-title":"LaMDA: Language models for dialog applications","author":"Thoppilan","year":"2022","journal-title":"arXiv:2201.08239"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00632"},{"key":"ref3","article-title":"Emergent abilities of large language models","author":"Wei","year":"2022","journal-title":"arXiv:2206.07682"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3490099.3511105"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.nlp.2023.100048"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref7","article-title":"Full stack optimization of Transformer inference: A survey","author":"Ki","year":"2023","journal-title":"arXiv:2302.14017"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.3390\/electronics12102299"},{"key":"ref9","article-title":"Large language model inference acceleration: A comprehensive hardware perspective","author":"Li","year":"2024","journal-title":"arXiv:2410.04466"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01306"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/hpca47549.2020.00035"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00060"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2022.3170848"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507738"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589057"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO61859.2024.00093"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480125"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527423"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00018"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655936"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/tcsi.2025.3576232"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/NORCHIP.2014.7004740"},{"key":"ref23","article-title":"OPT: Open pre-trained Transformer language models","author":"Zhang","year":"2022","journal-title":"arXiv:2205.01068"},{"key":"ref24","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023","journal-title":"arXiv:2307.09288"},{"key":"ref25","article-title":"Pointer sentinel mixture models","author":"Merity","year":"2016","journal-title":"arXiv:1609.07843"},{"key":"ref26","volume-title":"Free Dolly: Introducing the World\u2019s First Truly Open Instruction-Tuned LLM","author":"Conove","year":"2023"}],"container-title":["IEEE Transactions on Circuits and Systems II: Express Briefs"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/8920\/11180172\/11117182.pdf?arnumber=11117182","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T19:07:25Z","timestamp":1764184045000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11117182\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":26,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tcsii.2025.3596228","relation":{},"ISSN":["1549-7747","1558-3791"],"issn-type":[{"value":"1549-7747","type":"print"},{"value":"1558-3791","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}