{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T22:41:34Z","timestamp":1773182494545,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":77,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T00:00:00Z","timestamp":1714176000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS-2045985"],"award-info":[{"award-number":["CNS-2045985"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100001395","name":"Wisconsin Alumni Research Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100001395","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100007900","name":"University of Central Florida","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100007900","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,27]]},"DOI":"10.1145\/3620665.3640364","type":"proceedings-article","created":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T14:18:06Z","timestamp":1713795486000},"page":"167-184","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Carat: Unlocking Value-Level Parallelism for Multiplier-Free GEMMs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-5707-1137","authenticated-orcid":false,"given":"Zhewen","family":"Pan","sequence":"first","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Wisconsin-Madison, Madison, WI, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6886-7183","authenticated-orcid":false,"given":"Joshua","family":"San Miguel","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Wisconsin-Madison, Madison, WI, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9775-8026","authenticated-orcid":false,"given":"Di","family":"Wu","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Central Florida, Orlando, FL, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2024,4,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.118318"},{"key":"e_1_3_2_1_2_1","volume-title":"Kuniaki Uehara. Deep Learning for Stock Prediction Using Numerical and Textual Information. In International Conference on Computer and Information Science","author":"Akita Ryo","year":"2016","unstructured":"Ryo Akita, Akira Yoshihara, Takashi Matsubara, and Kuniaki Uehara. Deep Learning for Stock Prediction Using Numerical and Textual Information. In International Conference on Computer and Information Science, 2016."},{"key":"e_1_3_2_1_3_1","volume-title":"Online","year":"2022","unstructured":"Arm. Arm supports FP8: A new 8-bit floating-point interchange format for Neural Network processing. Online, Sep 2022."},{"issue":"6","key":"e_1_3_2_1_4_1","first-page":"5591","article-title":"Medical Diagnosis Using Deep Learning Techniques","volume":"25","author":"Azad Mir Mohammad","year":"2021","unstructured":"Mir Mohammad Azad, Apoorva Ganapathy, Siddhartha Vadlamudi, and Harish Paruchuri. Medical Diagnosis Using Deep Learning Techniques: A Research Survey. Annals of the Romanian Society for Cell Biology, 25(6):5591--5600, 2021.","journal-title":"A Research Survey. Annals of the Romanian Society for Cell Biology"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.3390\/mti2030047"},{"key":"e_1_3_2_1_6_1","volume-title":"Jun","author":"Balasubramonian Rajeev","year":"2017","unstructured":"Rajeev Balasubramonian, Andrew B. Kahng, Naveen Muralimanohar, Ali Shafiee, and Vaishnav Srinivas. CACTI 7: New Tools for Interconnect Exploration in Innovative Off-Chip Memories. Transactions on Architecture and Code Optimization, 14(2), Jun 2017."},{"key":"e_1_3_2_1_7_1","volume-title":"Online","year":"2023","unstructured":"Chase. How Often is Your Credit Score Updated? Online, Sep 2023."},{"key":"e_1_3_2_1_8_1","first-page":"145","volume-title":"Tieke He. A Deep Learning Method for Judicial Decision Support. In International Conference on Software Quality, Reliability and Security Companion","author":"Chen Baogui","year":"2019","unstructured":"Baogui Chen, Yu Li, Shu Zhang, Hao Lian, and Tieke He. A Deep Learning Method for Judicial Decision Support. In International Conference on Software Quality, Reliability and Security Companion, pages 145--149, 2019."},{"key":"e_1_3_2_1_9_1","volume-title":"Vivienne Sze. Eyeriss: A Spatial Architecture for Energy-Efficient Dataflow for Convolutional Neural Networks. In International Symposium on Computer Architecture","author":"Chen Yu-Hsin","year":"2016","unstructured":"Yu-Hsin Chen, Joel Emer, and Vivienne Sze. Eyeriss: A Spatial Architecture for Energy-Efficient Dataflow for Convolutional Neural Networks. In International Symposium on Computer Architecture, 2016."},{"key":"e_1_3_2_1_10_1","volume-title":"Minsoo Rhu. Lazy Batching: An SLA-aware Batching System for Cloud Machine Learning Inference. In International Symposium on High-Performance Computer Architecture","author":"Choi Yujeong","year":"2021","unstructured":"Yujeong Choi, Yunseong Kim, and Minsoo Rhu. Lazy Batching: An SLA-aware Batching System for Cloud Machine Learning Inference. In International Symposium on High-Performance Computer Architecture, 2021."},{"key":"e_1_3_2_1_11_1","volume-title":"Jyrki Alakuijala. Temporal Coding in Spiking Neural Networks with Alpha Synaptic Function. In International Conference on Acoustics, Speech and Signal Processing","author":"Comsa Iulia M.","year":"2020","unstructured":"Iulia M. Comsa, Krzysztof Potempa, Luca Versari, Thomas Fischbacher, Andrea Gesmundo, and Jyrki Alakuijala. Temporal Coding in Spiking Neural Networks with Alpha Synaptic Function. In International Conference on Acoustics, Speech and Signal Processing, 2020."},{"key":"e_1_3_2_1_12_1","volume-title":"Ion Stoica. Clipper: A Low-Latency Online Prediction Serving System. In Symposium on Networked Systems Design and Implementation","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw, Xin Wang, Guilio Zhou, Michael J. Franklin, Joseph E. Gonzalez, and Ion Stoica. Clipper: A Low-Latency Online Prediction Serving System. In Symposium on Networked Systems Design and Implementation, 2017."},{"key":"e_1_3_2_1_13_1","volume-title":"Ng. Large Scale Distributed Deep Networks. In International Conference on Neural Information Processing Systems","author":"Dean Jeffrey","year":"2012","unstructured":"Jeffrey Dean, Greg S. Corrado, Rajat Monga, Kai Chen, Matthieu Devin, Quoc V. Le, Mark Z. Mao, Marc'Aurelio Ranzato, Andrew Senior, Paul Tucker, Ke Yang, and Andrew Y. Ng. Large Scale Distributed Deep Networks. In International Conference on Neural Information Processing Systems, 2012."},{"key":"e_1_3_2_1_14_1","volume-title":"International Conference on Architectural Support for Programming Languages and Operating Systems","author":"Lascorz Alberto Delmas","year":"2019","unstructured":"Alberto Delmas Lascorz, Patrick Judd, Dylan Malone Stuart, Zissis Poulos, Mostafa Mahmoud, Sayeh Sharify, Milos Nikolic, Kevin Siu, and Andreas Moshovos. Bit-Tactical: A Software\/Hardware Approach to Exploiting Value and Bit Sparsity in Neural Networks. In International Conference on Architectural Support for Programming Languages and Operating Systems, 2019."},{"key":"e_1_3_2_1_15_1","volume-title":"Li Fei-Fei. ImageNet: A Large-Scale Hierarchical Image Database. In Conference on Computer Vision and Pattern Recognition","author":"Deng Jia","year":"2009","unstructured":"Jia Deng, Wei Dong, Richard Socher, Li-Jia Li, Kai Li, and Li Fei-Fei. ImageNet: A Large-Scale Hierarchical Image Database. In Conference on Computer Vision and Pattern Recognition, 2009."},{"key":"e_1_3_2_1_16_1","volume-title":"Annual Conference of the North American","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina\" Toutanova. BERT: Pre-Training of Deep Bidirectional Transformers for Language Understanding. In Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2019."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2020.2971596"},{"key":"e_1_3_2_1_18_1","volume-title":"Accelerating Inference and Language Model Fusion of Recurrent Neural Network Transducers via End-to-End 4-bit Quantization. arXiv","author":"Fasoli Andrea","year":"2022","unstructured":"Andrea Fasoli, Chia-Yu Chen, Mauricio Serrano, Swagath Venkataramani, George Saon, Xiaodong Cui, Brian Kingsbury, and Kailash Gopalakrishnan. Accelerating Inference and Language Model Fusion of Recurrent Neural Network Transducers via End-to-End 4-bit Quantization. arXiv, 2022."},{"key":"e_1_3_2_1_19_1","volume-title":"Fuchs and David Wentzlaff. Scaling Datacenter Accelerators with Compute-Reuse Architectures. In International Symposium on Computer Architecture","author":"Adi","year":"2018","unstructured":"Adi Fuchs and David Wentzlaff. Scaling Datacenter Accelerators with Compute-Reuse Architectures. In International Symposium on Computer Architecture, 2018."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037702"},{"key":"e_1_3_2_1_21_1","volume-title":"Jinyang Li. Low Latency RNN Inference with Cellular Batching. In EuroSys Conference","author":"Gao Pin","year":"2018","unstructured":"Pin Gao, Lingfan Yu, Yongwei Wu, and Jinyang Li. Low Latency RNN Inference with Cellular Batching. In EuroSys Conference, 2018."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3229762.3229763"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Patricia Gonzalez-Guerrero Meriam Gay Bautista Darren Lyles and George Michelogiannakis. Temporal and SFQ Pulse-Streams Encoding for Area-Efficient Superconducting Accelerators. In International Conference on Architectural Support for Programming Languages and Operating Systems 2022.","DOI":"10.1145\/3503222.3507765"},{"key":"e_1_3_2_1_24_1","volume-title":"Nov","year":"2022","unstructured":"Google. System Architecture. Online, Nov 2022."},{"key":"e_1_3_2_1_25_1","volume-title":"Online","year":"2023","unstructured":"Google. Edge TPU Compiler. Online, Apr 2023."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2021.03.006"},{"key":"e_1_3_2_1_27_1","volume-title":"Jian Sun. Deep Residual Learning for Image Recognition. In Conference on Computer Vision and Pattern Recognition","author":"He Kaiming","year":"2016","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. Deep Residual Learning for Image Recognition. In Conference on Computer Vision and Pattern Recognition, 2016."},{"key":"e_1_3_2_1_28_1","volume-title":"Alexander Gruenstein. Streaming End-to-end Speech Recognition for Mobile Devices. In International Conference on Acoustics, Speech and Signal Processing","author":"He Yanzhang","year":"2019","unstructured":"Yanzhang He, Tara N. Sainath, Rohit Prabhavalkar, Ian McGraw, Raziel Alvarez, Ding Zhao, David Rybach, Anjuli Kannan, Yonghui Wu, Ruoming Pang, Qiao Liang, Deepti Bhatia, Yuan Shangguan, Bo Li, Golan Pundak, Khe Chai Sim, Tom Bagby, Shuo-yiin Chang, Kanishka Rao, and Alexander Gruenstein. Streaming End-to-end Speech Recognition for Mobile Devices. In International Conference on Acoustics, Speech and Signal Processing, 2019."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00062"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00050"},{"key":"e_1_3_2_1_31_1","volume-title":"Online","year":"2019","unstructured":"Intel. Neural Network Distiller. Online, Oct 2019."},{"key":"e_1_3_2_1_32_1","volume-title":"Seq","year":"2022","unstructured":"Intel. Cross-Industry Hardware Specification to Accelerate AI Software Development. Online, Seq 2022."},{"key":"e_1_3_2_1_33_1","volume-title":"Doe Hyun Yoon. In-Datacenter Performance Analysis of A Tensor Processing Unit. In International Symposium on Computer Architecture","author":"Jouppi Norman P.","year":"2017","unstructured":"Norman P. Jouppi, Cliff Young, Nishant Patil, David Patterson, Gaurav Agrawal, Raminder Bajwa, Sarah Bates, Suresh Bhatia, Nan Boden, Al Borchers, Rick Boyle, Pierre-luc Cantin, Clifford Chao, Chris Clark, Jeremy Coriell, Mike Daley, Matt Dau, Jeffrey Dean, Ben Gelb, Tara Vazir Ghaemmaghami, Rajendra Gottipati, William Gulland, Robert Hagmann, C. Richard Ho, Doug Hogberg, John Hu, Robert Hundt, Dan Hurt, Julian Ibarz, Aaron Jaffey, Alek Jaworski, Alexander Kaplan, Harshit Khaitan, Daniel Killebrew, Andy Koch, Naveen Kumar, Steve Lacy, James Laudon, James Law, Diemthu Le, Chris Leary, Zhuyuan Liu, Kyle Lucke, Alan Lundin, Gordon MacKean, Adriana Maggiore, Maire Mahony, Kieran Miller, Rahul Nagarajan, Ravi Narayanaswami, Ray Ni, Kathy Nix, Thomas Norrie, Mark Omernick, Narayana Penukonda, Andy Phelps, Jonathan Ross, Matt Ross, Amir Salek, Emad Samadiani, Chris Severn, Gregory Sizikov, Matthew Snelham, Jed Souter, Dan Steinberg, Andy Swing, Mercedes Tan, Gregory Thorson, Bo Tian, Horia Toma, Erick Tuttle, Vijay Vasudevan, Richard Walter, Walter Wang, Eric Wilcox, and Doe Hyun Yoon. In-Datacenter Performance Analysis of A Tensor Processing Unit. In International Symposium on Computer Architecture, 2017."},{"key":"e_1_3_2_1_34_1","volume-title":"Nataraj Jammalamadaka, Jianyu Huang, Hector Yuen, et al. A Study of BFLOAT16 For Deep Learning Training. arXiv","author":"Kalamkar Dhiraj","year":"2019","unstructured":"Dhiraj Kalamkar, Dheevatsa Mudigere, Naveen Mellempudi, Dipankar Das, Kunal Banerjee, Sasikanth Avancha, Dharma Teja Vooturi, Nataraj Jammalamadaka, Jianyu Huang, Hector Yuen, et al. A Study of BFLOAT16 For Deep Learning Training. arXiv, 2019."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530510"},{"key":"e_1_3_2_1_36_1","volume-title":"Rashmi Vinayak. Boosting The Throughput and Accelerator Utilization of Specialized CNN Inference Beyond Increasing Batch Size. In International Conference on Machine Learning","author":"Kosaian Jack","year":"2021","unstructured":"Jack Kosaian, Amar Phanishayee, Matthai Philipose, Debadeepta Dey, and Rashmi Vinayak. Boosting The Throughput and Accelerator Utilization of Specialized CNN Inference Beyond Increasing Batch Size. In International Conference on Machine Learning, 2021."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACDT.2018.8592948"},{"key":"e_1_3_2_1_38_1","volume-title":"Yoni Choukroun. Low-bit Quantization of Neural Networks for Efficient Inference. In International Conference on Computer Vision Workshops","author":"Kravchik Eli","year":"2019","unstructured":"Eli Kravchik, Fan Yang, Pavel Kisilev, and Yoni Choukroun. Low-bit Quantization of Neural Networks for Efficient Inference. In International Conference on Computer Vision Workshops, 2019."},{"key":"e_1_3_2_1_39_1","volume-title":"Advances in Neural Information Processing Systems","author":"Kuzmin Andrey","year":"2022","unstructured":"Andrey Kuzmin, Mart Van Baalen, Yuwei Ren, Markus Nagel, Jorn Peters, and Tijmen Blankevoort. FP8 Quantization: The Power of the Exponent. In Advances in Neural Information Processing Systems, 2022."},{"key":"e_1_3_2_1_40_1","volume-title":"Hardware Cost of DNN Dataflow: A Data-Centric Approach. In International Symposium on Microarchitecture","author":"Kwon Hyoukjun","year":"2019","unstructured":"Hyoukjun Kwon, Prasanth Chatarasi, Michael Pellauer, Angshuman Parashar, Vivek Sarkar, and Tushar Krishna. Understanding Reuse, Performance, and Hardware Cost of DNN Dataflow: A Data-Centric Approach. In International Symposium on Microarchitecture, 2019."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TFUZZ.2019.2914642"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData47090.2019.9006550"},{"key":"e_1_3_2_1_43_1","volume-title":"Alexander C Berg. SSD: Single Shot Multi-box Detector. In European Conference on Computer Vision","author":"Liu Wei","year":"2016","unstructured":"Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, and Alexander C Berg. SSD: Single Shot Multi-box Detector. In European Conference on Computer Vision, 2016."},{"key":"e_1_3_2_1_44_1","volume-title":"Mingzhe Zhang. Distilling Bit-Level Sparsity Parallelism for General Purpose Deep Learning Acceleration. In International Symposium on Microarchitecture","author":"Lu Hang","year":"2021","unstructured":"Hang Lu, Liang Chang, Chenglong Li, Zixuan Zhu, Shengjian Lu, Yanhuan Liu, and Mingzhe Zhang. Distilling Bit-Level Sparsity Parallelism for General Purpose Deep Learning Acceleration. In International Symposium on Microarchitecture, 2021."},{"key":"e_1_3_2_1_45_1","volume-title":"Machine Learning and Systems","author":"Ma Siyuan","year":"2019","unstructured":"Siyuan Ma and Mikhail Belkin. Kernel Machines That Adapt To GPUs for Effective Large Batch Training. In Machine Learning and Systems, 2019."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2678373.2665747"},{"key":"e_1_3_2_1_47_1","volume-title":"Machine Learning and Systems","author":"Mattson Peter","year":"2020","unstructured":"Peter Mattson, Christine Cheng, Gregory Diamos, Cody Coleman, Paulius Micikevicius, David Patterson, Hanlin Tang, Gu-Yeon Wei, Peter Bailis, Victor Bittorf, David Brooks, Dehao Chen, Debo Dutta, Udit Gupta, Kim Hazelwood, Andy Hock, Xinyuan Huang, Daniel Kang, David Kanter, Naveen Kumar, Jeffery Liao, Deepak Narayanan, Tayo Oguntebi, Gennady Pekhimenko, Lillian Pentecost, Vijay Janapa Reddi, Taylor Robie, Tom St John, Carole-Jean Wu, Lingjie Xu, Cliff Young, and Matei Zaharia. MLPerf Training Benchmark. In Machine Learning and Systems, 2020."},{"key":"e_1_3_2_1_48_1","volume-title":"FP8 Formats for Deep Learning. arXiv","author":"Micikevicius Paulius","year":"2022","unstructured":"Paulius Micikevicius, Dusan Stosic, Neil Burgess, Marius Cornea, Pradeep Dubey, Richard Grisenthwaite, Sangwon Ha, Alexander Heinecke, Patrick Judd, John Kamalu, et al. FP8 Formats for Deep Learning. arXiv, 2022."},{"key":"e_1_3_2_1_49_1","volume-title":"James E Smith. A Microarchitecture Implementation Framework for Online Learning with Temporal Neural Networks. In Computer Society Annual Symposium on VLSI","author":"Nair Harideep","year":"2021","unstructured":"Harideep Nair, John Paul Shen, and James E Smith. A Microarchitecture Implementation Framework for Online Learning with Temporal Neural Networks. In Computer Society Annual Symposium on VLSI, 2021."},{"key":"e_1_3_2_1_50_1","volume-title":"Nair and Geoffrey E Hinton. Rectified Linear Units Improve Restricted Boltzmann Machines. In International Conference on International Conference on Machine Learning","author":"Vinod","year":"2010","unstructured":"Vinod Nair and Geoffrey E Hinton. Rectified Linear Units Improve Restricted Boltzmann Machines. In International Conference on International Conference on Machine Learning, 2010."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2018.2822300"},{"key":"e_1_3_2_1_52_1","unstructured":"Maxim Naumov Dheevatsa Mudigere Hao-Jun Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson G Azzolini et al. Deep Learning Recommendation Model for Personalization and Recommendation Systems. arXiv 2019."},{"key":"e_1_3_2_1_53_1","volume-title":"8-bit Numerical Formats for Deep Neural Networks. arXiv","author":"Noune Badreddine","year":"2022","unstructured":"Badreddine Noune, Philip Jones, Daniel Justus, Dominic Masters, and Carlo Luschi. 8-bit Numerical Formats for Deep Neural Networks. arXiv, 2022."},{"key":"e_1_3_2_1_54_1","volume-title":"Online","author":"Arm NVIDIA. NVIDIA","year":"2022","unstructured":"NVIDIA. NVIDIA, Arm, and Intel Publish FP8 Specification for Standardization as an Interchange Format for AI. Online, Sep 2022."},{"key":"e_1_3_2_1_55_1","volume-title":"Dally. FineGrained DRAM: Energy-Efficient DRAM for Extreme Bandwidth Systems. In International Symposium on Microarchitecture","author":"O'Connor Mike","year":"2017","unstructured":"Mike O'Connor, Niladrish Chatterjee, Donghyuk Lee, John Wilson, Aditya Agrawal, Stephen W. Keckler, and William J. Dally. FineGrained DRAM: Energy-Efficient DRAM for Extreme Bandwidth Systems. In International Symposium on Microarchitecture, 2017."},{"key":"e_1_3_2_1_56_1","volume-title":"Jian Sun. MegDet: A Large Mini-Batch Object Detector. In Conference on Computer Vision and Pattern Recognition","author":"Peng Chao","year":"2018","unstructured":"Chao Peng, Tete Xiao, Zeming Li, Yuning Jiang, Xiangyu Zhang, Kai Jia, Gang Yu, and Jian Sun. MegDet: A Large Mini-Batch Object Detector. In Conference on Computer Vision and Pattern Recognition, 2018."},{"key":"e_1_3_2_1_57_1","first-page":"2023","author":"Pokora Becky","year":"2023","unstructured":"Becky Pokora. Credit Card Statistics And Trends 2023. Online, Mar 2023.","journal-title":"Credit Card Statistics And Trends"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00016"},{"key":"e_1_3_2_1_59_1","volume-title":"Thomas Brox. U-Net: Convolutional Networks for Biomedical Image Segmentation. In International Conference on Medical image computing and computer-assisted intervention","author":"Ronneberger Olaf","year":"2015","unstructured":"Olaf Ronneberger, Philipp Fischer, and Thomas Brox. U-Net: Convolutional Networks for Biomedical Image Segmentation. In International Conference on Medical image computing and computer-assisted intervention, 2015."},{"key":"e_1_3_2_1_60_1","volume-title":"Keckler. Simba: Scaling Deep-Learning Inference with Multi-Chip-Module-Based Architecture. In International Symposium on Microarchitecture","author":"Shao Yakun Sophia","year":"2019","unstructured":"Yakun Sophia Shao, Jason Clemons, Rangharajan Venkatesan, Brian Zimmer, Matthew Fojtik, Nan Jiang, Ben Keller, Alicia Klinefelter, Nathaniel Pinckney, Priyanka Raina, Stephen G. Tell, Yanqing Zhang, William J. Dally, Joel Emer, C. Thomas Gray, Brucek Khailany, and Stephen W. Keckler. Simba: Scaling Deep-Learning Inference with Multi-Chip-Module-Based Architecture. In International Symposium on Microarchitecture, 2019."},{"key":"e_1_3_2_1_61_1","volume-title":"Power-Performance Accelerator Simulator Enabling Large Design Space Exploration of Customized Architectures. In International Symposium on Computer Architecture","author":"Shao Yakun Sophia","year":"2014","unstructured":"Yakun Sophia Shao, Brandon Reagen, Gu-Yeon Wei, and David Brooks. Aladdin: A Pre-RTL, Power-Performance Accelerator Simulator Enabling Large Design Space Exploration of Customized Architectures. In International Symposium on Computer Architecture, 2014."},{"key":"e_1_3_2_1_62_1","volume-title":"Deep Learning in Medical Image Analysis. Annual review of biomedical engineering, 19:221--248","author":"Shen Dinggang","year":"2017","unstructured":"Dinggang Shen, Guorong Wu, and Heung-Il Suk. Deep Learning in Medical Image Analysis. Annual review of biomedical engineering, 19:221--248, 2017."},{"key":"e_1_3_2_1_63_1","volume-title":"Ravi Sundaram. Nexus: A GPU Cluster Engine for Accelerating DNN-Based Video Analysis. In Symposium on Operating Systems Principles","author":"Shen Haichen","year":"2019","unstructured":"Haichen Shen, Lequn Chen, Yuchen Jin, Liangyu Zhao, Bingyu Kong, Matthai Philipose, Arvind Krishnamurthy, and Ravi Sundaram. Nexus: A GPU Cluster Engine for Accelerating DNN-Based Video Analysis. In Symposium on Operating Systems Principles, 2019."},{"key":"e_1_3_2_1_64_1","volume-title":"FlexGen: High-Throughput Generative Inference of Large Language Models with A Single GPU. International Conference on Machine Learning","author":"Sheng Ying","year":"2023","unstructured":"Ying Sheng, Lianmin Zheng, Binhang Yuan, Zhuohan Li, Max Ryabinin, Daniel Y Fu, Zhiqiang Xie, Beidi Chen, Clark Barrett, Joseph E Gonzalez, et al. FlexGen: High-Throughput Generative Inference of Large Language Models with A Single GPU. International Conference on Machine Learning, 2023."},{"key":"e_1_3_2_1_65_1","volume-title":"Xiaodong Cui, Wei Zhang, and Kailash Gopalakrishnan. Hybrid 8-Bit Floating Point (HFP8) Training and Inference for Deep Neural Networks. Advances in neural information processing systems, 32","author":"Sun Xiao","year":"2019","unstructured":"Xiao Sun, Jungwook Choi, Chia-Yu Chen, Naigang Wang, Swagath Venkataramani, Vijayalakshmi Viji Srinivasan, Xiaodong Cui, Wei Zhang, and Kailash Gopalakrishnan. Hybrid 8-Bit Floating Point (HFP8) Training and Inference for Deep Neural Networks. Advances in neural information processing systems, 32, 2019."},{"key":"e_1_3_2_1_66_1","volume-title":"Timothy Sherwood. Boosted Race Trees for Low Energy Classification. In International Conference on Architectural Support for Programming Languages and Operating Systems","author":"Tzimpragos Georgios","year":"2019","unstructured":"Georgios Tzimpragos, Advait Madhavan, Dilip Vasudevan, Dmitri Strukov, and Timothy Sherwood. Boosted Race Trees for Low Energy Classification. In International Conference on Architectural Support for Programming Languages and Operating Systems, 2019."},{"key":"e_1_3_2_1_67_1","volume-title":"Timothy Sherwood. Superconducting Computing with Alternating Logic Elements. In International Symposium on Computer Architecture","author":"Tzimpragos Georgios","year":"2021","unstructured":"Georgios Tzimpragos, Jennifer Volk, Alex Wynn, James E. Smith, and Timothy Sherwood. Superconducting Computing with Alternating Logic Elements. In International Symposium on Computer Architecture, 2021."},{"key":"e_1_3_2_1_68_1","volume-title":"International Symposium on Computer Architecture","author":"Li Jingjie","year":"2022","unstructured":"DiWu, Jingjie Li, Zhewen Pan, Younghyun Kim, and Joshua San Miguel. uBrain: A Unary Brain Computer Interface. In International Symposium on Computer Architecture, 2022."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00040"},{"key":"e_1_3_2_1_70_1","volume-title":"International Symposium on High-Performance Computer Architecture","author":"Wu Di","year":"2022","unstructured":"Di Wu and Joshua San Miguel. uSystolic: Byte-Crawling Unary Systolic Array. In International Symposium on High-Performance Computer Architecture, 2022."},{"key":"e_1_3_2_1_71_1","volume-title":"Wu and Joshua San Miguel. Special Session: When Dataflows Converge: Reconfigurable and Approximate Computing for Emerging Neural Networks. In International Conference on Computer Design","author":"Di","year":"2021","unstructured":"Di Wu and Joshua San Miguel. Special Session: When Dataflows Converge: Reconfigurable and Approximate Computing for Emerging Neural Networks. In International Conference on Computer Design, 2021."},{"key":"e_1_3_2_1_72_1","volume-title":"Online","author":"Wu Hao","year":"2019","unstructured":"Hao Wu. Low precision Inference on GPU. Online, Mar 2019."},{"key":"e_1_3_2_1_73_1","volume-title":"Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation. arXiv","author":"Wu Hao","year":"2020","unstructured":"Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev, and Paulius Micikevicius. Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation. arXiv, 2020."},{"key":"e_1_3_2_1_74_1","volume-title":"Vivienne Sze. Accelergy: An Architecture-Level Energy Estimation Methodology for Accelerator Designs. In International Conference on Computer-Aided Design","author":"Wu Yannan Nellie","year":"2019","unstructured":"Yannan Nellie Wu, Joel S. Emer, and Vivienne Sze. Accelergy: An Architecture-Level Energy Estimation Methodology for Accelerator Designs. In International Conference on Computer-Aided Design, 2019."},{"key":"e_1_3_2_1_75_1","volume-title":"International Conference on Machine Learning","author":"Yao Zhewei","year":"2021","unstructured":"Zhewei Yao, Zhen Dong, Zhangcheng Zheng, Amir Gholami, Jiali Yu, Eric Tan, Leyuan Wang, Qijing Huang, Yida Wang, Michael Mahoney, and Kurt Keutzer. HAWQ-V3: Dyadic Neural Network Quantization. In International Conference on Machine Learning, 2021."},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10070930"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322249"}],"event":{"name":"ASPLOS '24: 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","location":"La Jolla CA USA","acronym":"ASPLOS '24","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620665.3640364","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3620665.3640364","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3620665.3640364","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3620665.3640364","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:41Z","timestamp":1750291421000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620665.3640364"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,27]]},"references-count":77,"alternative-id":["10.1145\/3620665.3640364","10.1145\/3620665"],"URL":"https:\/\/doi.org\/10.1145\/3620665.3640364","relation":{},"subject":[],"published":{"date-parts":[[2024,4,27]]},"assertion":[{"value":"2024-04-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}