{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T10:28:30Z","timestamp":1771064910396,"version":"3.50.1"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62495102,92464104"],"award-info":[{"award-number":["62495102,92464104"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,3,31]]},"DOI":"10.23919\/date64628.2025.10993079","type":"proceedings-article","created":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T13:36:35Z","timestamp":1747834595000},"page":"1-7","source":"Crossref","is-referenced-by-count":6,"title":["LightMamba: Efficient Mamba Acceleration on FPGA with Quantization and Hardware Co-design"],"prefix":"10.23919","author":[{"given":"Renjie","family":"Wei","sequence":"first","affiliation":[{"name":"Institute for Artificial Intelligence"}]},{"given":"Songqiang","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Software and Microelectronics, Peking University,Beijing,China"}]},{"given":"Linfeng","family":"Zhong","sequence":"additional","affiliation":[{"name":"School of Electronic and Computer Engineering, Peking University,Shenzhen,China"}]},{"given":"Zebin","family":"Yang","sequence":"additional","affiliation":[{"name":"Institute for Artificial Intelligence"}]},{"given":"Qingyu","family":"Guo","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Peking University,Beijing,China"}]},{"given":"Yuan","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Peking University,Beijing,China"}]},{"given":"Runsheng","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Integrated Circuits, Peking University,Beijing,China"}]},{"given":"Meng","family":"Li","sequence":"additional","affiliation":[{"name":"Institute for Artificial Intelligence"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Mamba: Linear-time sequence modeling with selective state spaces","author":"Gu","year":"2023","journal-title":"arXiv preprint"},{"key":"ref2","article-title":"Transformers are ssms: Generalized models and efficient algorithms through structured state space duality","author":"Dao","year":"2024","journal-title":"arXiv preprint"},{"key":"ref3","article-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"arXiv preprint"},{"key":"ref4","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023","journal-title":"arXiv preprint"},{"key":"ref5","article-title":"Sparks of artificial general intelligence: Early experiments with gpt-4","author":"Bubeck","year":"2023","journal-title":"arXiv preprint"},{"key":"ref6","article-title":"Mixtral of experts","author":"Jiang","year":"2024","journal-title":"arXiv preprint"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3626202.3637562"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00051"},{"key":"ref9","first-page":"38087","article-title":"Smoothquant: Accurate and efficient post-training quantization for large language models","volume-title":"International Conference on Machine Learning","author":"Xiao","year":"2023"},{"key":"ref10","first-page":"17402","article-title":"Outlier suppression: Pushing the limit of low-bit transformer language models","volume":"35","author":"Wei","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref11","article-title":"Outlier suppression+: Accurate quantization of large language models by equiva-lent and optimal shifting and scaling","author":"Wei","year":"2023","journal-title":"arXiv preprint"},{"key":"ref12","article-title":"Evaluating quantized large language models","author":"Li","year":"2024","journal-title":"arXiv preprint"},{"key":"ref13","article-title":"Mamba-ptq: Outlier channels in recurrent large language models","author":"Pierro","year":"2024","journal-title":"arXiv preprint"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3676536.3676798"},{"key":"ref15","volume":"07339","author":"Dettmers","year":"2022","journal-title":"LIm. int8 (): 8-bit matrix multiplication for transformers at scale. corr abs\/2208"},{"key":"ref16","article-title":"Quarot: Outlier-free 4-bit inference in rotated llms","author":"Ashkboos","year":"2024","journal-title":"arXiv preprint"},{"key":"ref17","article-title":"Spinquant-llm quantization with learned rotations","author":"Liu","year":"2024","journal-title":"arXiv preprint"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3649476.3658810"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3656177"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3676536.3676681"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3370748.3406567"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00050"},{"key":"ref23","article-title":"Deep learning with int8 optimization on xilinx devices","author":"Fu","year":"2016","journal-title":"White Paper"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TC.1976.1674569"},{"issue":"8","key":"ref25","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"key":"ref26","article-title":"Hel-laswag: Can a machine really finish your sentence?","author":"Zellers","year":"2019","journal-title":"arXiv preprint"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6239"},{"key":"ref28","article-title":"Think you have solved question answering? try arc, the ai2 reasoning challenge","author":"Clark","year":"2018","journal-title":"arXiv preprint"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3474381"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1260"},{"key":"ref31","volume-title":"A framework for few-shot language model evaluation","author":"Gao","year":"2024"},{"key":"ref32","article-title":"Pointer sentinel mixture models","author":"Merity","year":"2016","journal-title":"arXiv preprint"}],"event":{"name":"2025 Design, Automation &amp; Test in Europe Conference (DATE)","location":"Lyon, France","start":{"date-parts":[[2025,3,31]]},"end":{"date-parts":[[2025,4,2]]}},"container-title":["2025 Design, Automation &amp;amp; Test in Europe Conference (DATE)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10992638\/10992588\/10993079.pdf?arnumber=10993079","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,6]],"date-time":"2025-11-06T18:47:35Z","timestamp":1762454855000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10993079\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,31]]},"references-count":32,"URL":"https:\/\/doi.org\/10.23919\/date64628.2025.10993079","relation":{},"subject":[],"published":{"date-parts":[[2025,3,31]]}}}