{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T06:35:29Z","timestamp":1763447729242,"version":"3.45.0"},"reference-count":21,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T00:00:00Z","timestamp":1759104000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T00:00:00Z","timestamp":1759104000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,9,29]]},"DOI":"10.1109\/socc66126.2025.11235364","type":"proceedings-article","created":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T18:39:03Z","timestamp":1763404743000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["SIMAX: a SIMD-Based Many-Core Accelerator for Matrix-Vector Multiplication for Transformers"],"prefix":"10.1109","author":[{"given":"Yue","family":"Yang","sequence":"first","affiliation":[{"name":"California State University,Electrical and Computer Engineering,Fresno Fresno,CA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"John","family":"Herrera","sequence":"additional","affiliation":[{"name":"California State University,Electrical and Computer Engineering,Fresno Fresno,CA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aaron","family":"Stillmaker","sequence":"additional","affiliation":[{"name":"California State University,Electrical and Computer Engineering,Fresno Fresno,CA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"article-title":"Attention is all you need","year":"2023","author":"Vaswani","key":"ref1"},{"key":"ref2","article-title":"Freepdk45: An open-source predictive 45nm pdk"},{"key":"ref3","first-page":"74","article-title":"Scaling equations for the accurate prediction of cmos device performance from 180nm to 7nm","volume-title":"Integration","volume":"58","author":"Stillmaker","year":"2017"},{"article-title":"Efficient transformers: A survey","year":"2022","author":"Tay","key":"ref4"},{"article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","year":"2019","author":"Devlin","key":"ref5"},{"article-title":"I-bert: Integer-only bert quantization","year":"2021","author":"Kim","key":"ref6"},{"article-title":"Quantization and training of neural networks for efficient integer-arithmetic-only inference","year":"2017","author":"Jacob","key":"ref7"},{"article-title":"Integer quantization for deep learning inference: Principles and empirical evaluation","year":"2020","author":"Wu","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/EMC2-NIPS53020.2019.00016"},{"issue":"4","key":"ref10","first-page":"970","article-title":"A 32-nm 1000-processor array with 0.56-2.8 v operation, 361 mw\/mm$\\hat 2$ throughput density and 3.6 pj\/inst","volume":"52","author":"Bai","year":"2017","journal-title":"IEEE Journal of Solid-State Circuits"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICDIS50059.2020.00025"},{"key":"ref12","doi-asserted-by":"crossref","DOI":"10.1145\/3079856.3080254","article-title":"Scnn: An accelerator for compressed-sparse convolutional neural networks","author":"Parashar","year":"2017"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2016.7418007"},{"key":"ref14","doi-asserted-by":"crossref","DOI":"10.1109\/JETCAS.2019.2910232","article-title":"Eyeriss v2: A flexible accelerator for emerging deep neural networks on mobile devices","author":"Chen","year":"2019"},{"author":"Das","key":"ref15","article-title":"Nvidia jetson nano benchmark"},{"key":"ref16","article-title":"Jetson xavier nx"},{"key":"ref17","article-title":"Edge tpu ml accelerator"},{"key":"ref18","doi-asserted-by":"crossref","DOI":"10.1145\/3579371.3589350","article-title":"Tpu v4: An optically reconfigurable supercomputer for machine learning with hardware support for embeddings","author":"Jouppi","year":"2023"},{"key":"ref19","article-title":"Apple m2"},{"key":"ref20","article-title":"Core\u2122 ultra 7 processor 155u"},{"key":"ref21","article-title":"Core\u2122 i7-11800h processor"}],"event":{"name":"2025 IEEE 38th International System-on-Chip Conference (SOCC)","start":{"date-parts":[[2025,9,29]]},"location":"Dubai, United Arab Emirates","end":{"date-parts":[[2025,10,1]]}},"container-title":["2025 IEEE 38th International System-on-Chip Conference (SOCC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11235309\/11235311\/11235364.pdf?arnumber=11235364","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T06:31:28Z","timestamp":1763447488000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11235364\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,29]]},"references-count":21,"URL":"https:\/\/doi.org\/10.1109\/socc66126.2025.11235364","relation":{},"subject":[],"published":{"date-parts":[[2025,9,29]]}}}