{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T12:00:39Z","timestamp":1774353639823,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,6,28]],"date-time":"2022-06-28T00:00:00Z","timestamp":1656374400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U20A20226"],"award-info":[{"award-number":["U20A20226"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Key R&D Program of China","award":["2021ZD0110104"],"award-info":[{"award-number":["2021ZD0110104"]}]},{"name":"Beijing Natural Science Foundation","award":["4202031"],"award-info":[{"award-number":["4202031"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,6,28]]},"DOI":"10.1145\/3524059.3532377","type":"proceedings-article","created":{"date-parts":[[2022,6,16]],"date-time":"2022-06-16T16:13:11Z","timestamp":1655395991000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Efficiently emulating high-bitwidth computation with low-bitwidth hardware"],"prefix":"10.1145","author":[{"given":"Zixuan","family":"Ma","sequence":"first","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haojie","family":"Wang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guanyu","family":"Feng","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chen","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lei","family":"Xie","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiaao","family":"He","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shengqi","family":"Chen","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jidong","family":"Zhai","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,6,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/IEEESTD.2008.4610935"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","first-page":"175","DOI":"10.1080\/00031305.1992.10475879","article-title":"An introduction to kernel and nearest-neighbor nonparametric regression","volume":"46","author":"Altman Naomi S","year":"1992","unstructured":"Naomi S Altman . 1992 . An introduction to kernel and nearest-neighbor nonparametric regression . The American Statistician 46 , 3 (1992), 175 -- 185 . Naomi S Altman. 1992. An introduction to kernel and nearest-neighbor nonparametric regression. The American Statistician 46, 3 (1992), 175--185.","journal-title":"The American Statistician"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/567806.567807"},{"key":"e_1_3_2_1_4_1","volume-title":"cudnn: Efficient primitives for deep learning. arXiv preprint arXiv:1410.0759","author":"Chetlur Sharan","year":"2014","unstructured":"Sharan Chetlur , Cliff Woolley , Philippe Vandermersch , Jonathan Cohen , John Tran , Bryan Catanzaro , and Evan Shelhamer . 2014. cudnn: Efficient primitives for deep learning. arXiv preprint arXiv:1410.0759 ( 2014 ). Sharan Chetlur, Cliff Woolley, Philippe Vandermersch, Jonathan Cohen, John Tran, Bryan Catanzaro, and Evan Shelhamer. 2014. cudnn: Efficient primitives for deep learning. arXiv preprint arXiv:1410.0759 (2014)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3009837.3009846"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.5555\/3433701.3433768"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01397083"},{"key":"e_1_3_2_1_8_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . 2018 . Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018). Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_9_1","first-page":"89","article-title":"TOP500 supercomputer sites","volume":"13","author":"Dongarra Jack J","year":"1997","unstructured":"Jack J Dongarra , Hans W Meuer , Erich Strohmaier , 1997 . TOP500 supercomputer sites . Supercomputer 13 (1997), 89 -- 111 . Jack J Dongarra, Hans W Meuer, Erich Strohmaier, et al. 1997. TOP500 supercomputer sites. Supercomputer 13 (1997), 89--111.","journal-title":"Supercomputer"},{"key":"e_1_3_2_1_10_1","unstructured":"Liwen Fan Ruixin Wang Kuan Fang and Xian Sun. 2019. cuBERT. https:\/\/github.com\/zhihu\/cuBERT. Liwen Fan Ruixin Wang Kuan Fang and Xian Sun. 2019. cuBERT. https:\/\/github.com\/zhihu\/cuBERT."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441599"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476157"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Evelyn Fix. 1951. Discriminatory analysis: nonparametric discrimination consistency properties. USAF School of Aviation Medicine. Evelyn Fix. 1951. Discriminatory analysis: nonparametric discrimination consistency properties. USAF School of Aviation Medicine.","DOI":"10.1037\/e471672008-001"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2008.4563100"},{"key":"e_1_3_2_1_15_1","unstructured":"Google. 2020. Advanced neural network processing for low-power devices. https:\/\/coral.ai\/technology Google. 2020. Advanced neural network processing for low-power devices. https:\/\/coral.ai\/technology"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3314221.3314597"},{"key":"e_1_3_2_1_17_1","unstructured":"Huawei. 2022. Ascend to Pervasive Intelligence. https:\/\/e.huawei.com\/en\/products\/servers\/ascend Huawei. 2022. Ascend to Pervasive Intelligence. https:\/\/e.huawei.com\/en\/products\/servers\/ascend"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/SiPS.2014.6986082"},{"key":"e_1_3_2_1_19_1","volume-title":"A user's guide to principal components","author":"Jackson J Edward","unstructured":"J Edward Jackson . 2005. A user's guide to principal components . Vol. 587 . John Wiley & Sons . J Edward Jackson. 2005. A user's guide to principal components. Vol. 587. John Wiley & Sons."},{"key":"e_1_3_2_1_20_1","volume-title":"Dissecting the graphcore ipu architecture via microbenchmarking. arXiv preprint arXiv:1912.03413","author":"Jia Zhe","year":"2019","unstructured":"Zhe Jia , Blake Tillman , Marco Maggioni , and Daniele Paolo Scarpazza . 2019. Dissecting the graphcore ipu architecture via microbenchmarking. arXiv preprint arXiv:1912.03413 ( 2019 ). Zhe Jia, Blake Tillman, Marco Maggioni, and Daniele Paolo Scarpazza. 2019. Dissecting the graphcore ipu architecture via microbenchmarking. arXiv preprint arXiv:1912.03413 (2019)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_22_1","volume-title":"Raquel Urtasun, and Andreas Moshovos.","author":"Judd Patrick","year":"2015","unstructured":"Patrick Judd , Jorge Albericio , Tayler Hetherington , Tor Aamodt , Natalie Enright Jerger , Raquel Urtasun, and Andreas Moshovos. 2015 . Reduced-precision strategies for bounded memory in deep neural nets. arXiv preprint arXiv:1511.05236 (2015). Patrick Judd, Jorge Albericio, Tayler Hetherington, Tor Aamodt, Natalie Enright Jerger, Raquel Urtasun, and Andreas Moshovos. 2015. Reduced-precision strategies for bounded memory in deep neural nets. arXiv preprint arXiv:1511.05236 (2015)."},{"key":"e_1_3_2_1_23_1","volume-title":"HPC up to 20x. the NVIDIA Blog","author":"Kharya Paresh","year":"2020","unstructured":"Paresh Kharya . 2020. TensorFloat-32 in the A100 GPU Accelerates AI Training , HPC up to 20x. the NVIDIA Blog ( 2020 ). Paresh Kharya. 2020. TensorFloat-32 in the A100 GPU Accelerates AI Training, HPC up to 20x. the NVIDIA Blog (2020)."},{"key":"e_1_3_2_1_24_1","volume-title":"Deep convolutional neural network inference with floating-point weights and fixed-point activations. arXiv preprint arXiv:1703.03073","author":"Lai Liangzhen","year":"2017","unstructured":"Liangzhen Lai , Naveen Suda , and Vikas Chandra . 2017. Deep convolutional neural network inference with floating-point weights and fixed-point activations. arXiv preprint arXiv:1703.03073 ( 2017 ). Liangzhen Lai, Naveen Suda, and Vikas Chandra. 2017. Deep convolutional neural network inference with floating-point weights and fixed-point activations. arXiv preprint arXiv:1703.03073 (2017)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00071"},{"key":"e_1_3_2_1_26_1","volume-title":"International conference on machine learning. PMLR, 2849--2858","author":"Lin Darryl","year":"2016","unstructured":"Darryl Lin , Sachin Talathi , and Sreekanth Annapureddy . 2016 . Fixed point quantization of deep convolutional networks . In International conference on machine learning. PMLR, 2849--2858 . Darryl Lin, Sachin Talathi, and Sreekanth Annapureddy. 2016. Fixed point quantization of deep convolutional networks. In International conference on machine learning. PMLR, 2849--2858."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/355958.355960"},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the fifth Berkeley symposium on mathematical statistics and probability","volume":"1","author":"James","unstructured":"James MacQueen et al. 1967. Some methods for classification and analysis of multivariate observations . In Proceedings of the fifth Berkeley symposium on mathematical statistics and probability , Vol. 1 . Oakland, CA, USA, 281--297. James MacQueen et al. 1967. Some methods for classification and analysis of multivariate observations. In Proceedings of the fifth Berkeley symposium on mathematical statistics and probability, Vol. 1. Oakland, CA, USA, 281--297."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2018.00091"},{"key":"e_1_3_2_1_30_1","unstructured":"NVIDIA. [n.d.]. cuBLAS. https:\/\/developer.nvidia.com\/cublas. NVIDIA. [n.d.]. cuBLAS. https:\/\/developer.nvidia.com\/cublas."},{"key":"e_1_3_2_1_31_1","unstructured":"NVIDIA. 2013. NVIDIA\/kmeans. https:\/\/github.com\/NVIDIA\/kmeans. NVIDIA. 2013. NVIDIA\/kmeans. https:\/\/github.com\/NVIDIA\/kmeans."},{"key":"e_1_3_2_1_32_1","volume-title":"NVIDIA A100 Tensor Core GPU Architecture. UNPRECEDENTED ACCELERATION AT EVERY SCALE. Version v1.0. NVIDIA","author":"NVIDIA.","year":"2020","unstructured":"NVIDIA. 2020. NVIDIA A100 Tensor Core GPU Architecture. UNPRECEDENTED ACCELERATION AT EVERY SCALE. Version v1.0. NVIDIA ( 2020 ). NVIDIA. 2020. NVIDIA A100 Tensor Core GPU Architecture. UNPRECEDENTED ACCELERATION AT EVERY SCALE. Version v1.0. NVIDIA (2020)."},{"key":"e_1_3_2_1_33_1","volume-title":"V100 GPU architecture. the world's most advanced data center GPU. Version WP-08608-001_v1.1. NVIDIA. Aug","author":"Tesla NVIDIA.","year":"2017","unstructured":"Tesla NVIDIA. 2017. V100 GPU architecture. the world's most advanced data center GPU. Version WP-08608-001_v1.1. NVIDIA. Aug ( 2017 ). Tesla NVIDIA. 2017. V100 GPU architecture. the world's most advanced data center GPU. Version WP-08608-001_v1.1. NVIDIA. Aug (2017)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.26599\/BDMA.2021.9020004"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3230733"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/1179622.1179682"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00055"},{"key":"e_1_3_2_1_38_1","volume-title":"BFloat16: the secret to high performance on cloud TPUs. Google Cloud Blog","author":"Wang Shibo","year":"2019","unstructured":"Shibo Wang and Pankaj Kanwar . 2019. BFloat16: the secret to high performance on cloud TPUs. Google Cloud Blog ( 2019 ). Shibo Wang and Pankaj Kanwar. 2019. BFloat16: the secret to high performance on cloud TPUs. Google Cloud Blog (2019)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447818.3460357"}],"event":{"name":"ICS '22: 2022 International Conference on Supercomputing","location":"Virtual Event","acronym":"ICS '22","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 36th ACM International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3524059.3532377","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3524059.3532377","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:30:37Z","timestamp":1750188637000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3524059.3532377"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,28]]},"references-count":39,"alternative-id":["10.1145\/3524059.3532377","10.1145\/3524059"],"URL":"https:\/\/doi.org\/10.1145\/3524059.3532377","relation":{},"subject":[],"published":{"date-parts":[[2022,6,28]]},"assertion":[{"value":"2022-06-28","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}