{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T05:52:52Z","timestamp":1763704372273,"version":"3.45.0"},"reference-count":40,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,26]],"date-time":"2025-10-26T00:00:00Z","timestamp":1761436800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,26]],"date-time":"2025-10-26T00:00:00Z","timestamp":1761436800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,26]]},"DOI":"10.1109\/iccad66269.2025.11240811","type":"proceedings-article","created":{"date-parts":[[2025,11,20]],"date-time":"2025-11-20T18:39:34Z","timestamp":1763663974000},"page":"1-9","source":"Crossref","is-referenced-by-count":0,"title":["ToMamba: Towards Token-Efficient Mamba Architecture on FPGA"],"prefix":"10.1109","author":[{"given":"Kejia","family":"Shi","sequence":"first","affiliation":[{"name":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China"}]},{"given":"Yue","family":"Cao","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China"}]},{"given":"Yuhang","family":"Du","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China"}]},{"given":"Jianli","family":"Chen","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China"}]},{"given":"Jun","family":"Yu","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China"}]},{"given":"Kun","family":"Wang","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/n19\u20131423"},{"article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","year":"2020","author":"Dosovitskiy","key":"ref3"},{"article-title":"Efficiently modeling long sequences with structured state spaces","year":"2021","author":"Gu","key":"ref4"},{"article-title":"Mamba: Linear-time sequence modeling with selective state spaces","year":"2023","author":"Gu","key":"ref5"},{"article-title":"Token merging: Your vit but faster","year":"2022","author":"Bolya","key":"ref6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.721"},{"key":"ref8","first-page":"13 937","article-title":"Dynamicvit: Efficient vision transformers with dynamic token sparsification","volume":"34","author":"Rao","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655936"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655982"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2018.2815603"},{"key":"ref12","first-page":"363","article-title":"Optimus: Optimized matrix multiplication structure for transformer neural network accelerator","volume-title":"Proceedings of Machine Learning and Systems","volume":"2","author":"Park"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD57390.2023.10323836"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586295"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2022.3197282"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3656497"},{"key":"ref17","first-page":"38 087","article-title":"Smoothquant: Accurate and efficient post-training quantization for large language models","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Xiao"},{"key":"ref18","first-page":"87","article-title":"Awq: Activation-aware weight quantization for on-device llm compression and acceleration","volume-title":"Proceedings of Machine Learning and Systems","volume":"6","author":"Lin"},{"article-title":"Evaluating quantized large language models","year":"2024","author":"Li","key":"ref19"},{"article-title":"Q-s5: Towards quantized state space models","year":"2024","author":"Abreu","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3658498"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655986"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00035"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/FPL64840.2024.00041"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00018"},{"article-title":"Evit: Expediting vision transformers via token reorganizations","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Liang","key":"ref26"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.100"},{"key":"ref28","first-page":"30 318","article-title":"Gpt3. int8 (): 8-bit matrix multiplication for transformers at scale","volume":"35","author":"Dettmers","year":"2022","journal-title":"Advances in neural information processing systems"},{"key":"ref29","first-page":"17 402","article-title":"Outlier suppression: Pushing the limit of low-bit transformer language models","volume":"35","author":"Wei","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.102"},{"article-title":"Mamba-ptq: Outlier channels in recurrent large language models","year":"2024","author":"Pierro","key":"ref31"},{"article-title":"Q-s5: Towards quantized state space models","year":"2024","author":"Abreu","key":"ref32"},{"article-title":"Mambaquant: Quantizing the mamba family with variance aligned rotation methods","year":"2025","author":"Xu","key":"ref33"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.23919\/DATE64628.2025.10993079"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6239"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TEC.1959.5222693"},{"article-title":"Think you have solved question answering? try arc, the ai2 reasoning challenge","year":"2018","author":"Clark","key":"ref37"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1472"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3474381"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3676536.3676798"}],"event":{"name":"2025 IEEE\/ACM International Conference On Computer Aided Design (ICCAD)","start":{"date-parts":[[2025,10,26]]},"location":"Munich, Germany","end":{"date-parts":[[2025,10,30]]}},"container-title":["2025 IEEE\/ACM International Conference On Computer Aided Design (ICCAD)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11240608\/11240621\/11240811.pdf?arnumber=11240811","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T05:44:40Z","timestamp":1763703880000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11240811\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,26]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/iccad66269.2025.11240811","relation":{},"subject":[],"published":{"date-parts":[[2025,10,26]]}}}