{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T01:52:22Z","timestamp":1773193942724,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":107,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,11]],"date-time":"2023-11-11T00:00:00Z","timestamp":1699660800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["EPMD-1955246"],"award-info":[{"award-number":["EPMD-1955246"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CNS-2112562"],"award-info":[{"award-number":["CNS-2112562"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000183","name":"Army Research Office","doi-asserted-by":"publisher","award":["W911NF-19-2-0107"],"award-info":[{"award-number":["W911NF-19-2-0107"]}],"id":[{"id":"10.13039\/100000183","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,12]]},"DOI":"10.1145\/3581784.3607077","type":"proceedings-article","created":{"date-parts":[[2023,10,30]],"date-time":"2023-10-30T20:34:48Z","timestamp":1698698088000},"page":"1-15","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["ReFloat: Low-Cost Floating-Point Processing in ReRAM for Accelerating Iterative Linear Solvers"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7450-2842","authenticated-orcid":false,"given":"Linghao","family":"Song","sequence":"first","affiliation":[{"name":"Computer Science, University of California Los Angeles, Los Angeles, California, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7928-8331","authenticated-orcid":false,"given":"Fan","family":"Chen","sequence":"additional","affiliation":[{"name":"Indiana University, Bloomington, Indiana, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3228-6544","authenticated-orcid":false,"given":"Hai","family":"Li","sequence":"additional","affiliation":[{"name":"Duke University, Durham, North Carolina, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1486-8412","authenticated-orcid":false,"given":"Yiran","family":"Chen","sequence":"additional","affiliation":[{"name":"Duke University, Durham, North Carolina, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2023,11,11]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"[n. d.]. Build and train machine learning models on our new Google Cloud TPUs. https:\/\/www.blog.google\/topics\/google-cloud\/google-cloud-offer-tpusmachine-learning\/."},{"key":"e_1_3_2_2_2_1","unstructured":"[n. d.]. Google supercharges machine learning tasks with TPU custom chip. https:\/\/cloudplatform.googleblog.com\/2016\/05\/Google-supercharges-machine-learning-tasks-with-custom-chip.html."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2016.7727298"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2010.2070830"},{"key":"e_1_3_2_2_5_1","volume-title":"Anne Greenbaum, Sven Hammarling, Alan McKenney, et al.","author":"Anderson Edward","year":"1999","unstructured":"Edward Anderson, Zhaojun Bai, Christian Bischof, L Susan Blackford, James Demmel, Jack Dongarra, Jeremy Du Croz, Anne Greenbaum, Sven Hammarling, Alan McKenney, et al. 1999. LAPACK users' guide. SIAM."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304049"},{"key":"e_1_3_2_2_7_1","volume-title":"Approximation of large-scale dynamical systems","author":"Antoulas Athanasios C","unstructured":"Athanasios C Antoulas. 2005. Approximation of large-scale dynamical systems. Vol. 6. Siam."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1137\/0610013"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1137\/060676489"},{"key":"e_1_3_2_2_10_1","volume-title":"Quality of Numerical Software","author":"Boisvert Ronald F","unstructured":"Ronald F Boisvert, Roldan Pozo, Karin Remington, Richard F Barrett, and Jack J Dongarra. 1997. Matrix market: a web resource for test matrix collections. In Quality of Numerical Software. Springer, 125--137."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446049"},{"key":"e_1_3_2_2_12_1","first-page":"15460","article-title":"NICAM 728: Digital two-channel sound for terrestrial television","volume":"91","author":"Bower AJ","year":"1990","unstructured":"AJ Bower. 1990. NICAM 728: Digital two-channel sound for terrestrial television. STIN 91 (1990), 15460.","journal-title":"STIN"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00044"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1002\/nme.5332"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2018.8310400"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001140"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1039\/C3NR05016E"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2018.022071131"},{"key":"e_1_3_2_2_19_1","first-page":"517","article-title":"754-2008 IEEE standard for floating-point arithmetic","volume":"2008","author":"IEEE Standards Committee et al.","year":"2008","unstructured":"IEEE Standards Committee et al. 2008. 754-2008 IEEE standard for floating-point arithmetic. IEEE Computer Society Std 2008 (2008), 517.","journal-title":"IEEE Computer Society Std"},{"key":"e_1_3_2_2_20_1","unstructured":"Altera Corporation. 2005. TFFT\/IFFT Block Floating Point Scaling. https:\/\/www.intel.com\/content\/dam\/www\/programmable\/us\/en\/pdfs\/literature\/an\/an404.pdf."},{"key":"e_1_3_2_2_21_1","volume-title":"Binaryconnect: Training deep neural networks with binary weights during propagations. In Advances in neural information processing systems. 3123--3131.","author":"Courbariaux Matthieu","year":"2015","unstructured":"Matthieu Courbariaux, Yoshua Bengio, and Jean-Pierre David. 2015. Binaryconnect: Training deep neural networks with binary weights during propagations. In Advances in neural information processing systems. 3123--3131."},{"key":"e_1_3_2_2_22_1","volume-title":"Binarized neural networks: Training deep neural networks with weights and activations constrained to+ 1 or-1. arXiv preprint arXiv:1602.02830","author":"Courbariaux Matthieu","year":"2016","unstructured":"Matthieu Courbariaux, Itay Hubara, Daniel Soudry, Ran El-Yaniv, and Yoshua Bengio. 2016. Binarized neural networks: Training deep neural networks with weights and activations constrained to+ 1 or-1. arXiv preprint arXiv:1602.02830 (2016)."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3330345.3331057"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2049662.2049663"},{"key":"e_1_3_2_2_25_1","volume-title":"Applied numerical linear algebra","author":"Demmel James W","unstructured":"James W Demmel. 1997. Applied numerical linear algebra. Vol. 56. Siam."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3588195.3592985"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356147"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00067"},{"key":"e_1_3_2_2_29_1","volume-title":"A block floating point implementation for an N-point FFT on the TMS320C55X DSP. Texas Instruments Application Report","author":"Elam David","year":"2003","unstructured":"David Elam and Cesar Lovescu. 2003. A block floating point implementation for an N-point FFT on the TMS320C55X DSP. Texas Instruments Application Report (2003)."},{"key":"e_1_3_2_2_30_1","volume-title":"2011 38th Annual International Symposium on Computer Architecture (ISCA). 365--376","author":"Esmaeilzadeh H.","unstructured":"H. Esmaeilzadeh, E. Blem, R. S. Amant, K. Sankaralingam, and D. Burger. 2011. Dark silicon and the end of multicore scaling. In 2011 38th Annual International Symposium on Computer Architecture (ISCA). 365--376."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2004.26"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00039"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00015"},{"key":"e_1_3_2_2_34_1","volume-title":"An Analog Preconditioner for Solving Linear Systems. In 2021 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE, 761--774","author":"Feinberg Ben","unstructured":"Ben Feinberg, Ryan Wong, T Patrick Xiao, Christopher H Bennett, Jacob N Rohan, Erik G Boman, Matthew J Marinella, Sapan Agarwal, and Engin Ipek. [n. d.]. An Analog Preconditioner for Solving Linear Systems. In 2021 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE, 761--774."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441599"},{"key":"e_1_3_2_2_36_1","volume-title":"Computational methods for fluid dynamics","author":"Ferziger Joel H","unstructured":"Joel H Ferziger and Milovan Peri\u0107. 2002. Computational methods for fluid dynamics. Vol. 3. Springer."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.5555\/2650280.2650344"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/5.915374"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173171"},{"key":"e_1_3_2_2_40_1","volume-title":"Scientific computing: an introduction with parallel computing","author":"Golub Gene H","unstructured":"Gene H Golub and James M Ortega. 2014. Scientific computing: an introduction with parallel computing. Elsevier."},{"key":"e_1_3_2_2_41_1","volume-title":"Evaluating derivatives: principles and techniques of algorithmic differentiation","author":"Griewank Andreas","unstructured":"Andreas Griewank and Andrea Walther. 2008. Evaluating derivatives: principles and techniques of algorithmic differentiation. Vol. 105. Siam."},{"key":"e_1_3_2_2_42_1","volume-title":"International Conference on Machine Learning. 1737--1746","author":"Gupta Suyog","year":"2015","unstructured":"Suyog Gupta, Ankur Agrawal, Kailash Gopalakrishnan, and Pritish Narayanan. 2015. Deep learning with limited numerical precision. In International Conference on Machine Learning. 1737--1746."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001163"},{"key":"e_1_3_2_2_44_1","volume-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149","author":"Han Song","year":"2015","unstructured":"Song Han, Huizi Mao, and William J Dally. 2015. Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149 (2015)."},{"key":"e_1_3_2_2_45_1","volume-title":"Quantum wells, wires and dots: theoretical and computational physics of semiconductor nanostructures","author":"Harrison Paul","unstructured":"Paul Harrison and Alex Valavanis. 2016. Quantum wells, wires and dots: theoretical and computational physics of semiconductor nanostructures. John Wiley & Sons."},{"key":"e_1_3_2_2_46_1","volume-title":"Methods of conjugate gradients for solving linear systems","author":"Hestenes Magnus Rudolph","unstructured":"Magnus Rudolph Hestenes and Eduard Stiefel. 1952. Methods of conjugate gradients for solving linear systems. Vol. 49. NBS Washington, DC."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/2897937.2898010"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.5555\/3122009.3242044"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322237"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00011"},{"key":"e_1_3_2_2_51_1","volume-title":"Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference. In 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. IEEE, 2704--2713","author":"Jacob Benoit","year":"2018","unstructured":"Benoit Jacob, Skirmantas Kligys, Bo Chen, Menglong Zhu, Matthew Tang, Andrew Howard, Hartwig Adam, and Dmitry Kalenichenko. 2018. Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference. In 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. IEEE, 2704--2713."},{"key":"e_1_3_2_2_52_1","unstructured":"Frank Jensen. 2017. Introduction to computational chemistry. John wiley & sons."},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2015.7113355"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304048"},{"key":"e_1_3_2_2_55_1","volume-title":"Dissecting the NVIDIA volta GPU architecture via microbenchmarking. arXiv preprint arXiv:1804.06826","author":"Jia Zhe","year":"2018","unstructured":"Zhe Jia, Marco Maggioni, Benjamin Staiger, and Daniele P Scarpazza. 2018. Dissecting the NVIDIA volta GPU architecture via microbenchmarking. arXiv preprint arXiv:1804.06826 (2018)."},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_2_57_1","unstructured":"Paresh Kharya. [n. d.]. TensorFloat-32 in the A100 GPU Accelerates AI Training HPC up to 20x. https:\/\/blogs.nvidia.com\/blog\/2020\/05\/14\/tensorfloat-32-precision-format\/."},{"key":"e_1_3_2_2_58_1","volume-title":"Compression of deep convolutional neural networks for fast and low power mobile applications. arXiv preprint arXiv:1511.06530","author":"Kim Yong-Deok","year":"2015","unstructured":"Yong-Deok Kim, Eunhyeok Park, Sungjoo Yoo, Taelim Choi, Lu Yang, and Dongjun Shin. 2015. Compression of deep convolutional neural networks for fast and low power mobile applications. arXiv preprint arXiv:1511.06530 (2015)."},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/30.44310"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2017.7870467"},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2013.6494986"},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/2872887.2750417"},{"key":"e_1_3_2_2_63_1","volume-title":"Automation & Test in Europe Conference & Exhibition (DATE). IEEE, 815--820","author":"Li Bing","year":"2018","unstructured":"Bing Li, Linghao Song, Fan Chen, Xuehai Qian, Yiran Chen, and Hai Helen Li. 2018. Reram-based accelerator for deep learning. In 2018 Design, Automation & Test in Europe Conference & Exhibition (DATE). IEEE, 815--820."},{"key":"e_1_3_2_2_64_1","volume-title":"Ternary weight networks. arXiv preprint arXiv:1605.04711","author":"Li Fengfu","year":"2016","unstructured":"Fengfu Li, Bo Zhang, and Bin Liu. 2016. Ternary weight networks. arXiv preprint arXiv:1605.04711 (2016)."},{"key":"e_1_3_2_2_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00022"},{"key":"e_1_3_2_2_66_1","volume-title":"Neural networks with few multiplications. arXiv preprint arXiv:1510.03009","author":"Lin Zhouhan","year":"2015","unstructured":"Zhouhan Lin, Matthieu Courbariaux, Roland Memisevic, and Yoshua Bengio. 2015. Neural networks with few multiplications. arXiv preprint arXiv:1510.03009 (2015)."},{"key":"e_1_3_2_2_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2017.75"},{"key":"e_1_3_2_2_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3205289.3205313"},{"key":"e_1_3_2_2_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC19947.2020.9062953"},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3317742"},{"key":"e_1_3_2_2_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/2751205.2751209"},{"key":"e_1_3_2_2_72_1","doi-asserted-by":"publisher","DOI":"10.1145\/321386.321394"},{"key":"e_1_3_2_2_73_1","volume-title":"GPU Technology Conference.","author":"Naumov Maxim","year":"2010","unstructured":"Maxim Naumov, L Chien, Philippe Vandermersch, and Ujval Kapasi. 2010. Cusparse library. In GPU Technology Conference."},{"key":"e_1_3_2_2_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD.2013.6691092"},{"key":"e_1_3_2_2_75_1","volume-title":"Graphics processing units in bioinformatics, computational biology and systems biology. Briefings in bioinformatics 18, 5","author":"Nobile Marco S","year":"2017","unstructured":"Marco S Nobile, Paolo Cazzaniga, Andrea Tangherloni, and Daniela Besozzi. 2017. Graphics processing units in bioinformatics, computational biology and systems biology. Briefings in bioinformatics 18, 5 (2017), 870--885."},{"key":"e_1_3_2_2_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2019.8662307"},{"key":"e_1_3_2_2_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540724"},{"key":"e_1_3_2_2_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/2370816.2370870"},{"key":"e_1_3_2_2_79_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_32"},{"key":"e_1_3_2_2_80_1","volume-title":"Iterative methods for sparse linear systems","author":"Saad Yousef","unstructured":"Yousef Saad. 2003. Iterative methods for sparse linear systems. Vol. 82. siam."},{"key":"e_1_3_2_2_81_1","volume-title":"ISAAC: A Convolutional Neural Network Accelerator with In-Situ Analog Arithmetic in Crossbars. In 2016 ACM\/IEEE 43rd Annual International Symposium on Computer Architecture (ISCA). IEEE, 14--26","author":"Shafiee Ali","year":"2016","unstructured":"Ali Shafiee, Anirban Nag, Naveen Muralimanohar, Rajeev Balasubramonian, John Paul Strachan, Miao Hu, R Stanley Williams, and Vivek Srikumar. 2016. ISAAC: A Convolutional Neural Network Accelerator with In-Situ Analog Arithmetic in Crossbars. In 2016 ACM\/IEEE 43rd Annual International Symposium on Computer Architecture (ISCA). IEEE, 14--26."},{"key":"e_1_3_2_2_82_1","doi-asserted-by":"publisher","DOI":"10.1145\/3168831"},{"key":"e_1_3_2_2_83_1","doi-asserted-by":"publisher","DOI":"10.1145\/2833179.2833183"},{"key":"e_1_3_2_2_84_1","doi-asserted-by":"publisher","DOI":"10.1145\/2304576.2304625"},{"key":"e_1_3_2_2_85_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530420"},{"key":"e_1_3_2_2_86_1","doi-asserted-by":"publisher","DOI":"10.1145\/3490422.3502357"},{"key":"e_1_3_2_2_87_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543622.3573182"},{"key":"e_1_3_2_2_88_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.55"},{"key":"e_1_3_2_2_89_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00052"},{"key":"e_1_3_2_2_90_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00062"},{"key":"e_1_3_2_2_91_1","series-title":"SIAM Journal on scientific and Statistical Computing 13, 2","volume-title":"Bi-CGSTAB: A fast and smoothly converging variant of Bi-CG for the solution of nonsymmetric linear systems","author":"Van der Vorst Henk A","year":"1992","unstructured":"Henk A Van der Vorst. 1992. Bi-CGSTAB: A fast and smoothly converging variant of Bi-CG for the solution of nonsymmetric linear systems. SIAM Journal on scientific and Statistical Computing 13, 2 (1992), 631--644."},{"key":"e_1_3_2_2_92_1","doi-asserted-by":"publisher","DOI":"10.1038\/530144a"},{"key":"e_1_3_2_2_93_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC19947.2020.9062979"},{"key":"e_1_3_2_2_94_1","volume-title":"High-Performance Computing on the Intel\u00ae Xeon Phi\u2122","author":"Wang Endong","unstructured":"Endong Wang, Qing Zhang, Bo Shen, Guangyong Zhang, Xiaowei Lu, Qing Wu, and Yajuan Wang. 2014. Intel math kernel library. In High-Performance Computing on the Intel\u00ae Xeon Phi\u2122. Springer, 167--188."},{"key":"e_1_3_2_2_95_1","volume-title":"BFloat16: the secret to high performance on cloud TPUs. Google Cloud Blog","author":"Wang Shibo","year":"2019","unstructured":"Shibo Wang and Pankaj Kanwar. 2019. BFloat16: the secret to high performance on cloud TPUs. Google Cloud Blog (2019)."},{"key":"e_1_3_2_2_96_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC.2018.8465896"},{"key":"e_1_3_2_2_97_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD46524.2019.00074"},{"key":"e_1_3_2_2_98_1","doi-asserted-by":"publisher","DOI":"10.1145\/3400302.3415677"},{"key":"e_1_3_2_2_99_1","volume-title":"Rounding errors in algebraic processes","author":"Wilkinson James Hardy","unstructured":"James Hardy Wilkinson. 1994. Rounding errors in algebraic processes. Courier Corporation."},{"key":"e_1_3_2_2_100_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2012.2190369"},{"key":"e_1_3_2_2_101_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2018.8310399"},{"key":"e_1_3_2_2_102_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2019.8662395"},{"key":"e_1_3_2_2_103_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC19947.2020.9063078"},{"key":"e_1_3_2_2_104_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322271"},{"key":"e_1_3_2_2_105_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018743.3018755"},{"key":"e_1_3_2_2_106_1","volume-title":"Incremental network quantization: Towards lossless cnns with low-precision weights. arXiv preprint arXiv:1702.03044","author":"Zhou Aojun","year":"2017","unstructured":"Aojun Zhou, Anbang Yao, Yiwen Guo, Lin Xu, and Yurong Chen. 2017. Incremental network quantization: Towards lossless cnns with low-precision weights. arXiv preprint arXiv:1702.03044 (2017)."},{"key":"e_1_3_2_2_107_1","volume-title":"Dorefa-net: Training low bitwidth convolutional neural networks with low bitwidth gradients. arXiv preprint arXiv:1606.06160","author":"Zhou Shuchang","year":"2016","unstructured":"Shuchang Zhou, Yuxin Wu, Zekun Ni, Xinyu Zhou, He Wen, and Yuheng Zou. 2016. Dorefa-net: Training low bitwidth convolutional neural networks with low bitwidth gradients. arXiv preprint arXiv:1606.06160 (2016)."}],"event":{"name":"SC '23: International Conference for High Performance Computing, Networking, Storage and Analysis","location":"Denver CO USA","acronym":"SC '23","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","IEEE CS"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581784.3607077","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3581784.3607077","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581784.3607077","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581784.3607077","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:23Z","timestamp":1750178183000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581784.3607077"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,11]]},"references-count":107,"alternative-id":["10.1145\/3581784.3607077","10.1145\/3581784"],"URL":"https:\/\/doi.org\/10.1145\/3581784.3607077","relation":{},"subject":[],"published":{"date-parts":[[2023,11,11]]},"assertion":[{"value":"2023-11-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}