{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,28]],"date-time":"2025-08-28T05:10:19Z","timestamp":1756357819160,"version":"3.44.0"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,7,6]],"date-time":"2025-07-06T00:00:00Z","timestamp":1751760000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,6]],"date-time":"2025-07-06T00:00:00Z","timestamp":1751760000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,7,6]]},"DOI":"10.1109\/isvlsi65124.2025.11130225","type":"proceedings-article","created":{"date-parts":[[2025,8,27]],"date-time":"2025-08-27T18:20:15Z","timestamp":1756318815000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["Stella Nera: A Differentiable Maddness-Based Hardware Accelerator for Efficient Approximate Matrix Multiplication"],"prefix":"10.1109","author":[{"given":"Jannis","family":"Sch\u00f6nleber","sequence":"first","affiliation":[{"name":"Integrated Systems Laboratory ETH Zurich"}]},{"given":"Lukas","family":"Cavigelli","sequence":"additional","affiliation":[{"name":"Computing Systems Lab Zurich Research Center, Huawei Technologies"}]},{"given":"Matteo","family":"Perotti","sequence":"additional","affiliation":[{"name":"Integrated Systems Laboratory ETH Zurich"}]},{"given":"Luca","family":"Benini","sequence":"additional","affiliation":[{"name":"University of Bologna,Integrated Systems Laboratory ETH Zurich"}]},{"given":"Renzo","family":"Andri","sequence":"additional","affiliation":[{"name":"Computing Systems Lab Zurich Research Center, Huawei Technologies"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42613.2021.9366060"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2019.8875654"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247785"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247662"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247855"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530422"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC55821.2022.9926331"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2021.3075420"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS.2018.8351807"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS51556.2021.9401214"},{"article-title":"Multiplying matrices without multiplying","volume-title":"International Conference on Machine Learning","author":"Blalock","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1166\/jolpe.2011.1157"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/2487575.2487623"},{"key":"ref15","article-title":"Multiplying matrices without multiplying","volume":"abs\/2106.10860","author":"Blalock","year":"2021","journal-title":"CoRR"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.3169\/mta.6.2"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098195"},{"key":"ref18","first-page":"1617","article-title":"Differentiable product quantization for end-to-end embedding compression","volume-title":"Proc. ICML","author":"Chen"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.23919\/DATE56975.2023.10137218"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3570361.3613285"},{"key":"ref21","article-title":"Look-ups are not (yet) all you need for deep learning inference","author":"McCarter","year":"2022","journal-title":"arXiv preprint arXiv:2207.05808"},{"article-title":"Slide: In defense of smart algorithms over hardware acceleration for large-scale deep learning systems","volume-title":"Proc. MLSys","author":"Chen","key":"ref22"},{"article-title":"Compressing neural networks with the hashing trick","year":"2015","author":"Chen","key":"ref23"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098035"},{"article-title":"Are we there yet? product quantization and its hardware acceleration","year":"2023","author":"Fernandez-Marques","key":"ref25"},{"article-title":"Yet another representation of binary decision trees: A mathematical demonstration","year":"2022","author":"Zhang","key":"ref26"},{"article-title":"Estimating or propagating gradients through stochastic neurons for conditional computation","year":"2013","author":"Bengio","key":"ref27"},{"article-title":"High performance convolutional neural networks for document processing","volume-title":"Tenth international workshop on frontiers in handwriting recognition","author":"Chellapilla","key":"ref28"},{"key":"ref29","doi-asserted-by":"crossref","DOI":"10.1109\/ISSCC42614.2022.9731639","article-title":"A multi-mode 8 k-mac hw-utilization-aware neural processing unit with a unified multi-precision datapath in 4 nm flagship mobile soc","volume-title":"Proc. IEEE ISSCC","author":"Park"},{"article-title":"A 3.4-to-13.3 tops\/w 3.6 tops dual-core deep-learning accelerator for versatile ai applications in 7 nm 5 g smartphone soc","volume-title":"Proc. IEEE ISSCC","author":"Lin","key":"ref30"},{"key":"ref31","first-page":"144","article-title":"A 7 nm 4-core ai chip with 25.6 tflops hybrid fp.8 training, 102.4 tops int4 inference and workload-aware throttling","volume-title":"Proc. IEEE ISSCC","volume":"64","author":"Agrawal"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1038\/s41928-023-01010-1"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/VLSIC.2018.8502333"},{"key":"ref34","doi-asserted-by":"crossref","DOI":"10.23919\/DATE54114.2022.9774759","article-title":"Redmule: A compact fp.16 matrix-multiplication accelerator for adaptive deep learning on risc-v-based ultra-low-power socs","volume-title":"Proc. IEEE DATE","author":"Tortorella"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS51556.2021.9401196"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/tvlsi.2020.3044752"}],"event":{"name":"2025 IEEE Computer Society Annual Symposium on VLSI (ISVLSI)","start":{"date-parts":[[2025,7,6]]},"location":"Kalamata, Greece","end":{"date-parts":[[2025,7,9]]}},"container-title":["2025 IEEE Computer Society Annual Symposium on VLSI (ISVLSI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11129697\/11130193\/11130225.pdf?arnumber=11130225","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,28]],"date-time":"2025-08-28T04:31:18Z","timestamp":1756355478000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11130225\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,6]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/isvlsi65124.2025.11130225","relation":{},"subject":[],"published":{"date-parts":[[2025,7,6]]}}}