{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T17:53:41Z","timestamp":1775066021803,"version":"3.50.1"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,8,6]],"date-time":"2025-08-06T00:00:00Z","timestamp":1754438400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,8,6]],"date-time":"2025-08-06T00:00:00Z","timestamp":1754438400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,8,6]]},"DOI":"10.1109\/islped65674.2025.11261776","type":"proceedings-article","created":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T18:39:13Z","timestamp":1764787153000},"page":"1-7","source":"Crossref","is-referenced-by-count":1,"title":["Hybrid Systolic Array Accelerator with Optimized Dataflow for Edge Large Language Model Inference"],"prefix":"10.1109","author":[{"given":"Chun-Ting","family":"Chen","sequence":"first","affiliation":[{"name":"Cornell Tech,School of Electrical and Computer Engineering"}]},{"given":"HanGyeol","family":"Mun","sequence":"additional","affiliation":[{"name":"Cornell Tech,School of Electrical and Computer Engineering"}]},{"given":"Jian","family":"Meng","sequence":"additional","affiliation":[{"name":"Cornell Tech,School of Electrical and Computer Engineering"}]},{"given":"Mohamed S.","family":"Abdelfattah","sequence":"additional","affiliation":[{"name":"Cornell Tech,School of Electrical and Computer Engineering"}]},{"given":"Jae-sun","family":"Seo","sequence":"additional","affiliation":[{"name":"Cornell Tech,School of Electrical and Computer Engineering"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Phi-3 technical report: A highly capable language model locally on your phone","author":"Abdin","year":"2024"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.58"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655936"},{"key":"ref4","first-page":"873","article-title":"Vs-quant: Per-vector scaled quantization for accurate low-precision neural network inference","volume-title":"Proceedings of Machine Learning and Systems","volume":"3","author":"Dai"},{"key":"ref5","article-title":"Transformers are ssms: generalized models and efficient algorithms through structured state space duality","volume-title":"Proceedings of the 41st International Conference on Machine Learning","author":"Dao"},{"key":"ref6","article-title":"The llama 3 herd of models","author":"Dubey","year":"2024"},{"key":"ref7","article-title":"Gptq: Accurate post-training quantization for generative pre-trained transformers","author":"Frantar","year":"2022"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ISLPED58423.2023.10244348"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2023.3234893"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/DAC.2018.8465897"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC49657.2024.10454330"},{"key":"ref12","article-title":"Qserve: W4a8kv4 quantization and system co-design for efficient llm serving","author":"Lin","year":"2024"},{"key":"ref13","first-page":"1002","article-title":"Bucket getter: A bucket-based processing engine for low-bit block floating point (bfp) dnns","volume-title":"Proceedings of the 56th Annual IEEE\/ACM International Symposium on Microarchitecture","author":"Lo"},{"key":"ref14","article-title":"Pointer sentinel mixture models","author":"Merity","year":"2016"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42615.2023.10067615"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/K16-1028"},{"key":"ref17","article-title":"Nvidia jetson orin nano"},{"key":"ref18","first-page":"41","article-title":"Fine-grained dram: Energy-efficient dram for extreme bandwidth systems","volume-title":"Proceedings of the 50th Annual IEEE\/ACM International Symposium on Microarchitecture","author":"O\u2019Connor"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589057"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00079"},{"key":"ref21","article-title":"Microscaling data formats for deep learning","author":"Rouhani","year":"2023"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.127063"},{"key":"ref23","article-title":"Retentive network: A successor to transformer for large language models","author":"Sun","year":"2023"},{"key":"ref24","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD57390.2023.10323725"},{"key":"ref26","first-page":"38 087","article-title":"Smoothquant: Accurate and efficient post-training quantization for large language models","volume-title":"International Conference on Machine Learning","author":"Xiao"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00082"},{"key":"ref28","article-title":"Atom: Low-bit quantization for efficient and accurate llm serving","volume-title":"Proceedings of Machine Learning and Systems (MLSys)","author":"Zhao"}],"event":{"name":"2025 IEEE\/ACM International Symposium on Low Power Electronics and Design (ISLPED)","location":"Reykjav\u00edk, Iceland","start":{"date-parts":[[2025,8,6]]},"end":{"date-parts":[[2025,8,8]]}},"container-title":["2025 IEEE\/ACM International Symposium on Low Power Electronics and Design (ISLPED)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11261694\/11261695\/11261776.pdf?arnumber=11261776","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T07:49:54Z","timestamp":1764834594000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11261776\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,6]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/islped65674.2025.11261776","relation":{},"subject":[],"published":{"date-parts":[[2025,8,6]]}}}