{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,11]],"date-time":"2026-06-11T05:57:33Z","timestamp":1781157453626,"version":"3.54.1"},"reference-count":56,"publisher":"IEEE","license":[{"start":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T00:00:00Z","timestamp":1778630400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,5,13]],"date-time":"2026-05-13T00:00:00Z","timestamp":1778630400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100003803","name":"University of Hong Kong","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003803","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,5,13]]},"DOI":"10.1109\/fccm68464.2026.00025","type":"proceedings-article","created":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T19:59:45Z","timestamp":1781121585000},"page":"90-99","source":"Crossref","is-referenced-by-count":0,"title":["ViM-Q: Scalable Algorithm-Hardware Co-Design for Vision Mamba Model Inference on FPGA"],"prefix":"10.1109","author":[{"given":"Shengzhe","family":"Lyu","sequence":"first","affiliation":[{"name":"City University of Hong Kong,Hong Kong"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuhan","family":"She","sequence":"additional","affiliation":[{"name":"City University of Hong Kong,Hong Kong"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Patrick S. Y.","family":"Hung","sequence":"additional","affiliation":[{"name":"City University of Hong Kong,Hong Kong"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ray C. C.","family":"Cheung","sequence":"additional","affiliation":[{"name":"City University of Hong Kong,Hong Kong"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Weitao","family":"Xu","sequence":"additional","affiliation":[{"name":"City University of Hong Kong,Hong Kong"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref2","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Dosovitskiy"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref4","first-page":"10347","article-title":"Training data-efficient image transformers & distillation through attention","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","author":"Touvron"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref6","first-page":"13937","article-title":"Dynamicvit: Efficient vision transformers with dynamic token sparsification","volume":"34","author":"Rao","year":"2021","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"ref7","article-title":"BEit: BERT pre-training of image transformers","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Bao"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3505244"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3152247"},{"key":"ref11","article-title":"Mamba: Linear-time sequence modeling with selective state spaces","volume-title":"Conference on Language Modeling (COLM)","author":"Gu"},{"key":"ref12","first-page":"10041","article-title":"Transformers are ssms: generalized models and efficient algorithms through structured state space duality","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","author":"Dao"},{"key":"ref13","article-title":"Vision mamba: Efficient visual representation learning with bidirectional state space model","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","author":"Zhu"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.52202\/079017-3273"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00423"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02352"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00424"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-91979-4_2"},{"key":"ref19","article-title":"Mamba in vision: A comprehensive survey of techniques and applications","author":"Rahman","year":"2024"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2025.3610435"},{"key":"ref21","article-title":"A survey on mamba architecture for vision applications","author":"Ibrahim","year":"2025"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/WACV61041.2025.00122"},{"key":"ref23","article-title":"Mamba-ptq: Outlier channels in recurrent large language models","author":"Pierro","year":"2024"},{"key":"ref24","article-title":"Mambaquant: Quantizing the mamba family with variance aligned rotation methods","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Xu"},{"key":"ref25","article-title":"Quamba: A post-training quantization recipe for selective state space models","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Chiang"},{"key":"ref26","article-title":"Quamba2: A robust and scalable post-training quantization framework for selective state space models","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","author":"Chiang"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP62443.2025.11204250"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51701.2025.02273"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/iccv51701.2025.01967"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3695053.3731057"},{"key":"ref31","first-page":"873","article-title":"Vs-quant: Per-vector scaled quantization for accurate low-precision neural network inference","volume":"3","author":"Dai","year":"2021","journal-title":"Machine Learning and Systems (MLSys)"},{"key":"ref32","first-page":"307","article-title":"Ladder: Enabling efficient low-precision deep learning computing through hardware-aware tensor transformation","volume-title":"Proceedings of the USENIX Symposium on Operating Systems Design and Implementation (OSDI)","author":"Wang"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3689031.3696099"},{"key":"ref34","first-page":"87","article-title":"Awq: Activation-aware weight quantization for on-device llm compression and acceleration","volume":"6","author":"Lin","year":"2024","journal-title":"Machine Learning and Systems (MLSys)"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3676536.3676798"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.23919\/DATE64628.2025.10993079"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM62733.2025.00078"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD66269.2025.11240777"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD66269.2025.11240945"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ISVLSI65124.2025.11130250"},{"key":"ref41","first-page":"38087","article-title":"Smoothquant: Accurate and efficient post-training quantization for large language models","volume-title":"Proceedings of the International Conference on Machine Learning (ICML)","author":"Xiao"},{"key":"ref42","article-title":"SVDQuant: Absorbing outliers by low-rank component for 4-bit diffusion models","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Li"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00027"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2024.3422684"},{"key":"ref45","article-title":"Additive powers-of-two quantization: An efficient non-uniform discretization for neural networks","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Li"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/219"},{"key":"ref47","article-title":"Power-of-two quantization for low bitwidth and hardware compliant neural networks","author":"Przewlocka-Rus","year":"2022"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1970"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.52202\/068431-2198"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD57390.2023.10323651"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/3020078.3021744"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICFPT64416.2024.11113430"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA61900.2025.00057"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA61900.2025.00085"},{"key":"ref55","article-title":"Lut-gemm: Quantized matrix multiplication based on luts for efficient inference in large-scale generative language models","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR)","author":"Park"},{"key":"ref56","article-title":"Prefix sums and their applications","author":"Blelloch","year":"1990"}],"event":{"name":"2026 IEEE 34th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)","location":"Atlanta, GA, USA","start":{"date-parts":[[2026,5,13]]},"end":{"date-parts":[[2026,5,16]]}},"container-title":["2026 IEEE 34th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11552597\/11552602\/11552664.pdf?arnumber=11552664","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,11]],"date-time":"2026-06-11T05:05:01Z","timestamp":1781154301000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11552664\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,13]]},"references-count":56,"URL":"https:\/\/doi.org\/10.1109\/fccm68464.2026.00025","relation":{},"subject":[],"published":{"date-parts":[[2026,5,13]]}}}