{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T18:49:25Z","timestamp":1769280565251,"version":"3.49.0"},"reference-count":39,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Innovation Technology Fund Mid-Stream Research Program","award":["ITS\/018\/22MS"],"award-info":[{"award-number":["ITS\/018\/22MS"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Circuits Syst. I"],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1109\/tcsi.2025.3549060","type":"journal-article","created":{"date-parts":[[2025,3,18]],"date-time":"2025-03-18T17:48:18Z","timestamp":1742320098000},"page":"2509-2519","source":"Crossref","is-referenced-by-count":2,"title":["Topkima-Former: Low-Energy, Low-Latency Inference for Transformers Using Top-<i>k<\/i> In-Memory ADC"],"prefix":"10.1109","volume":"72","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-4807-5094","authenticated-orcid":false,"given":"Shuai","family":"Dong","sequence":"first","affiliation":[{"name":"Department of Electrical Engineering, City University of Hong Kong, Kowloon, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5867-4943","authenticated-orcid":false,"given":"Junyi","family":"Yang","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, City University of Hong Kong, Kowloon, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7716-5318","authenticated-orcid":false,"given":"Xiaoqi","family":"Peng","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, City University of Hong Kong, Kowloon, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6276-1947","authenticated-orcid":false,"given":"Hongyang","family":"Shang","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, City University of Hong Kong, Kowloon, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9809-1192","authenticated-orcid":false,"given":"Ye","family":"Ke","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, City University of Hong Kong, Kowloon, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1400-7994","authenticated-orcid":false,"given":"Xiaofeng","family":"Yang","sequence":"additional","affiliation":[{"name":"Reexen Technology, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0904-5736","authenticated-orcid":false,"given":"Hongjie","family":"Liu","sequence":"additional","affiliation":[{"name":"Reexen Technology, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1035-8770","authenticated-orcid":false,"given":"Arindam","family":"Basu","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, City University of Hong Kong, Kowloon, Hong Kong"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2010.11929"},{"key":"ref2","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv:1810.04805"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2023.3315060"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2024.3426653"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2023.3338378"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD57390.2023.10323836"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530585"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2021.3064189"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2021.3138057"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1126\/science.abj9979"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1038\/s41928-020-0435-7"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2023.3282046"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2020.3043731"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CICC48029.2020.9075883"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586134"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2022.3175534"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/NORCHIP.2014.7004740"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1162\/089976600300014827"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071081"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2022.3162602"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2022.3152653"},{"key":"ref22","first-page":"5506","article-title":"I-BERT: Integer-only BERT quantization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kim"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00018"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3299874.3317988"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.163"},{"key":"ref26","first-page":"1614","article-title":"From softmax to sparsemax: A sparse model of attention and multi-label classification","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Martins"},{"key":"ref27","article-title":"Sparse-softmax: A simpler and faster alternative softmax transformation","author":"Sun","year":"2021","journal-title":"arXiv:2112.12433"},{"key":"ref28","first-page":"1","article-title":"ReTransformer: ReRAM-based processing-in-memory architecture for transformer acceleration","volume-title":"Proc. 39th Int. Conf. Comput.-Aided Design","author":"Yang"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2022.3213542"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2019.2907488"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2014.2342715"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-20870-7_7"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3583781.3590259"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3045029"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00060"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2023.3337777"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1063\/5.0222533"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2014.6757460"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2011.2123630"}],"container-title":["IEEE Transactions on Circuits and Systems I: Regular Papers"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/8919\/11018065\/10931119.pdf?arnumber=10931119","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,30]],"date-time":"2025-05-30T05:28:15Z","timestamp":1748582895000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10931119\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":39,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tcsi.2025.3549060","relation":{},"ISSN":["1549-8328","1558-0806"],"issn-type":[{"value":"1549-8328","type":"print"},{"value":"1558-0806","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6]]}}}