{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,15]],"date-time":"2026-01-15T22:38:38Z","timestamp":1768516718480,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T00:00:00Z","timestamp":1634428800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,18]]},"DOI":"10.1145\/3466752.3480106","type":"proceedings-article","created":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T19:16:55Z","timestamp":1634498215000},"page":"857-869","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["FPRaker: A Processing Element For Accelerating Neural Network Training"],"prefix":"10.1145","author":[{"given":"Omar Mohamed","family":"Awad","sequence":"first","affiliation":[{"name":"University of Toronto\/Huawei"}]},{"given":"Mostafa","family":"Mahmoud","sequence":"additional","affiliation":[{"name":"Toronto"}]},{"given":"Isak","family":"Edo","sequence":"additional","affiliation":[{"name":"University of Toronto\/Arm"}]},{"given":"Ali Hadi","family":"Zadeh","sequence":"additional","affiliation":[{"name":"University of Toronto, Canada"}]},{"given":"Ciaran","family":"Bannon","sequence":"additional","affiliation":[{"name":"University of Toronto"}]},{"given":"Anand","family":"Jayarajan","sequence":"additional","affiliation":[{"name":"University of Toronto, Canada"}]},{"given":"Gennady","family":"Pekhimenko","sequence":"additional","affiliation":[{"name":"University of Toronto\/Vector Institute, Canada"}]},{"given":"Andreas","family":"Moshovos","sequence":"additional","affiliation":[{"name":"University of Toronto\/Vector Institute, Canada"}]}],"member":"320","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. ANSI\/IEEE Std 754-2019 http:\/\/754r.ucbtest.org. https:\/\/754r.ucbtest.org\/background\/  [n. d.]. ANSI\/IEEE Std 754-2019 http:\/\/754r.ucbtest.org. https:\/\/754r.ucbtest.org\/background\/"},{"key":"e_1_3_2_1_2_1","volume-title":"Bit-pragmatic Deep Neural Network Computing. In Intl\u2019 Symp. on Microarchitecture.","author":"Albericio Jorge","year":"2017","unstructured":"Jorge Albericio , Alberto Delm\u00e1s , Patrick Judd , Sayeh Sharify , Gerard O\u2019Leary , Roman Genov , and Andreas Moshovos . 2017 . Bit-pragmatic Deep Neural Network Computing. In Intl\u2019 Symp. on Microarchitecture. Jorge Albericio, Alberto Delm\u00e1s, Patrick Judd, Sayeh Sharify, Gerard O\u2019Leary, Roman Genov, and Andreas Moshovos. 2017. Bit-pragmatic Deep Neural Network Computing. In Intl\u2019 Symp. on Microarchitecture."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001138"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1075"},{"key":"e_1_3_2_1_5_1","unstructured":"Cadence. [n. d.]. Innovus Implementation System. https:\/\/www.cadence.com\/content\/cadence-www\/global\/en_US\/home\/tools\/digital-design-and-signoff\/hierarchical-design-and-floorplanning\/innovus-implementation-system.html.  Cadence. [n. d.]. Innovus Implementation System. https:\/\/www.cadence.com\/content\/cadence-www\/global\/en_US\/home\/tools\/digital-design-and-signoff\/hierarchical-design-and-floorplanning\/innovus-implementation-system.html."},{"key":"e_1_3_2_1_6_1","volume-title":"Munich","author":"Castro M.","year":"2018","unstructured":"Francisco\u00a0 M. Castro , Manuel\u00a0 J. Mar\u00edn-Jim\u00e9nez , Nicol\u00e1s Guil , Cordelia Schmid , and Karteek Alahari . 2018 . End-to-End Incremental Learning. In Computer Vision - ECCV 2018 - 15th European Conference , Munich , Germany, September 8-14, 2018, Proceedings, Part XII. 241\u2013257. https:\/\/doi.org\/10.1007\/978-3-030-01258-8_15 10.1007\/978-3-030-01258-8_15 Francisco\u00a0M. Castro, Manuel\u00a0J. Mar\u00edn-Jim\u00e9nez, Nicol\u00e1s Guil, Cordelia Schmid, and Karteek Alahari. 2018. End-to-End Incremental Learning. In Computer Vision - ECCV 2018 - 15th European Conference, Munich, Germany, September 8-14, 2018, Proceedings, Part XII. 241\u2013257. https:\/\/doi.org\/10.1007\/978-3-030-01258-8_15"},{"key":"e_1_3_2_1_7_1","volume-title":"IEEE International Conference on Acoustics, Speech, and Signal Processing. IEEE, 483\u2013486","author":"Chau P","unstructured":"P Chau , K. Chew , and W. Ki . 1987. A Bit-Serial Floating-Point Complex Multiplier-Accumulator for Fault-Tolerant Digital Signal Processing Arrays . In IEEE International Conference on Acoustics, Speech, and Signal Processing. IEEE, 483\u2013486 . P Chau, K. Chew, and W. Ki. 1987. A Bit-Serial Floating-Point Complex Multiplier-Accumulator for Fault-Tolerant Digital Signal Processing Arrays. In IEEE International Conference on Acoustics, Speech, and Signal Processing. IEEE, 483\u2013486."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/JETCAS.2019.2910232"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001177"},{"key":"e_1_3_2_1_10_1","volume-title":"Vijayalakshmi Srinivasan, and Kailash Gopalakrishnan.","author":"Choi Jungwook","year":"2018","unstructured":"Jungwook Choi , Zhuo Wang , Swagath Venkataramani , Pierce I-Jen Chuang , Vijayalakshmi Srinivasan, and Kailash Gopalakrishnan. 2018 . Pact : Parameterized clipping activation for quantized neural networks. arXiv preprint arXiv:1805.06085(2018). Jungwook Choi, Zhuo Wang, Swagath Venkataramani, Pierce I-Jen Chuang, Vijayalakshmi Srinivasan, and Kailash Gopalakrishnan. 2018. Pact: Parameterized clipping activation for quantized neural networks. arXiv preprint arXiv:1805.06085(2018)."},{"key":"e_1_3_2_1_11_1","volume-title":"6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings. https:\/\/openreview.net\/forum?id=H135uzZ0-","author":"Das Dipankar","year":"2018","unstructured":"Dipankar Das , Naveen Mellempudi , Dheevatsa Mudigere , Dhiraj\u00a0 D. Kalamkar , Sasikanth Avancha , Kunal Banerjee , Srinivas Sridharan , Karthik Vaidyanathan , Bharat Kaul , Evangelos Georganas , Alexander Heinecke , Pradeep Dubey , Jes\u00fas Corbal , Nikita Shustrov , Roman Dubtsov , Evarist Fomenko , and Vadim\u00a0 O. Pirogov . 2018 . Mixed Precision Training of Convolutional Neural Networks using Integer Operations . In 6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings. https:\/\/openreview.net\/forum?id=H135uzZ0- Dipankar Das, Naveen Mellempudi, Dheevatsa Mudigere, Dhiraj\u00a0D. Kalamkar, Sasikanth Avancha, Kunal Banerjee, Srinivas Sridharan, Karthik Vaidyanathan, Bharat Kaul, Evangelos Georganas, Alexander Heinecke, Pradeep Dubey, Jes\u00fas Corbal, Nikita Shustrov, Roman Dubtsov, Evarist Fomenko, and Vadim\u00a0O. Pirogov. 2018. Mixed Precision Training of Convolutional Neural Networks using Integer Operations. In 6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings. https:\/\/openreview.net\/forum?id=H135uzZ0-"},{"key":"e_1_3_2_1_12_1","unstructured":"Christopher De\u00a0Sa Megan Leszczynski Jian Zhang Alana Marzoev Christopher\u00a0R Aberger Kunle Olukotun and Christopher R\u00e9. 2018. High-accuracy low-precision training. arXiv preprint arXiv:1803.03383(2018).  Christopher De\u00a0Sa Megan Leszczynski Jian Zhang Alana Marzoev Christopher\u00a0R Aberger Kunle Olukotun and Christopher R\u00e9. 2018. High-accuracy low-precision training. arXiv preprint arXiv:1803.03383(2018)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304041"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.5555\/3326943.3326985"},{"key":"e_1_3_2_1_15_1","volume-title":"Neural Cache: Bit-Serial In-Cache Acceleration of Deep Neural Networks. In 45th ACM\/IEEE Annual Intl\u2019 Symp. on Computer Architecture, ISCA 2018","author":"Eckert Charles","year":"2018","unstructured":"Charles Eckert , Xiaowei Wang , Jingcheng Wang , Arun Subramaniyan , Ravi\u00a0 R. Iyer , Dennis Sylvester , David\u00a0 T. Blaauw , and Reetuparna Das . 2018 . Neural Cache: Bit-Serial In-Cache Acceleration of Deep Neural Networks. In 45th ACM\/IEEE Annual Intl\u2019 Symp. on Computer Architecture, ISCA 2018 , Los Angeles, CA, USA , June 1-6, 2018. 383\u2013396. https:\/\/doi.org\/10.1109\/ISCA.2018.00040 10.1109\/ISCA.2018.00040 Charles Eckert, Xiaowei Wang, Jingcheng Wang, Arun Subramaniyan, Ravi\u00a0R. Iyer, Dennis Sylvester, David\u00a0T. Blaauw, and Reetuparna Das. 2018. Neural Cache: Bit-Serial In-Cache Acceleration of Deep Neural Networks. In 45th ACM\/IEEE Annual Intl\u2019 Symp. on Computer Architecture, ISCA 2018, Los Angeles, CA, USA, June 1-6, 2018. 383\u2013396. https:\/\/doi.org\/10.1109\/ISCA.2018.00040"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Desmond Elliott Stella Frank Khalil Sima\u2019an and Lucia Specia. 2016. Multi30K: Multilingual English-German Image Descriptions. arxiv:cs.CL\/1605.00459  Desmond Elliott Stella Frank Khalil Sima\u2019an and Lucia Specia. 2016. Multi30K: Multilingual English-German Image Descriptions. arxiv:cs.CL\/1605.00459","DOI":"10.18653\/v1\/W16-3210"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2010.121"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/103162.103163"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358291"},{"key":"e_1_3_2_1_20_1","volume-title":"International Conference on Machine Learning. 1737\u20131746","author":"Gupta Suyog","year":"2015","unstructured":"Suyog Gupta , Ankur Agrawal , Kailash Gopalakrishnan , and Pritish Narayanan . 2015 . Deep learning with limited numerical precision . In International Conference on Machine Learning. 1737\u20131746 . Suyog Gupta, Ankur Agrawal, Kailash Gopalakrishnan, and Pritish Narayanan. 2015. Deep learning with limited numerical precision. In International Conference on Machine Learning. 1737\u20131746."},{"key":"e_1_3_2_1_21_1","volume-title":"MASR: A Modular Accelerator for Sparse RNNs. In 28th International Conference on Parallel Architectures and Compilation Techniques, PACT 2019","author":"Gupta Udit","year":"2019","unstructured":"Udit Gupta , Brandon Reagen , Lillian Pentecost , Marco Donato , Thierry Tambe , Alexander\u00a0 M. Rush , Gu-Yeon Wei , and David Brooks . 2019 . MASR: A Modular Accelerator for Sparse RNNs. In 28th International Conference on Parallel Architectures and Compilation Techniques, PACT 2019 , Seattle, WA, USA , September 23-26, 2019. IEEE, 1\u201314. https:\/\/doi.org\/10.1109\/PACT.2019.00009 10.1109\/PACT.2019.00009 Udit Gupta, Brandon Reagen, Lillian Pentecost, Marco Donato, Thierry Tambe, Alexander\u00a0M. Rush, Gu-Yeon Wei, and David Brooks. 2019. MASR: A Modular Accelerator for Sparse RNNs. In 28th International Conference on Parallel Architectures and Compilation Techniques, PACT 2019, Seattle, WA, USA, September 23-26, 2019. IEEE, 1\u201314. https:\/\/doi.org\/10.1109\/PACT.2019.00009"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001163"},{"key":"e_1_3_2_1_23_1","article-title":"The MovieLens Datasets","volume":"5","author":"Harper Maxwell","year":"2015","unstructured":"F.\u00a0 Maxwell Harper and Joseph\u00a0 A. Konstan . 2015 . The MovieLens Datasets : History and Context. ACM Trans. Interact. Intell. Syst. 5 , 4, Article Article 19 (Dec. 2015), 19\u00a0pages. https:\/\/doi.org\/10.1145\/2827872 10.1145\/2827872 F.\u00a0Maxwell Harper and Joseph\u00a0A. Konstan. 2015. The MovieLens Datasets: History and Context. ACM Trans. Interact. Intell. Syst. 5, 4, Article Article 19 (Dec. 2015), 19\u00a0pages. https:\/\/doi.org\/10.1145\/2827872","journal-title":"History and Context. ACM Trans. Interact. Intell. Syst."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2019.00019"},{"key":"e_1_3_2_1_25_1","unstructured":"HewlettPackard. [n. d.]. CACTI. https:\/\/github.com\/HewlettPackard\/cacti.  HewlettPackard. [n. d.]. CACTI. https:\/\/github.com\/HewlettPackard\/cacti."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/4.62143"},{"key":"e_1_3_2_1_27_1","volume-title":"Costas Bekas, and Dimitrios\u00a0S. Nikolopoulos.","author":"Istrate Roxana","year":"2018","unstructured":"Roxana Istrate , Adelmo Cristiano\u00a0Innocenza Malossi , Costas Bekas, and Dimitrios\u00a0S. Nikolopoulos. 2018 . Incremental Training of Deep Convolutional Neural Networks. CoRR abs\/1803.10232(2018). arxiv:1803.10232http:\/\/arxiv.org\/abs\/1803.10232 Roxana Istrate, Adelmo Cristiano\u00a0Innocenza Malossi, Costas Bekas, and Dimitrios\u00a0S. Nikolopoulos. 2018. Incremental Training of Deep Convolutional Neural Networks. CoRR abs\/1803.10232(2018). arxiv:1803.10232http:\/\/arxiv.org\/abs\/1803.10232"},{"key":"e_1_3_2_1_28_1","volume-title":"Stripes: Bit-serial Deep Neural Network Computing. In Intl\u2019 Symp. on Microarchitecture.","author":"Judd Patrick","year":"2016","unstructured":"Patrick Judd , Jorge Albericio , Tayler Hetherington , Tor Aamodt , and Andreas Moshovos . 2016 . Stripes: Bit-serial Deep Neural Network Computing. In Intl\u2019 Symp. on Microarchitecture. Patrick Judd, Jorge Albericio, Tayler Hetherington, Tor Aamodt, and Andreas Moshovos. 2016. Stripes: Bit-serial Deep Neural Network Computing. In Intl\u2019 Symp. on Microarchitecture."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/MDAT.2017.2741463"},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems(NIPS\u201917)","author":"K\u00f6ster Urs","year":"2017","unstructured":"Urs K\u00f6ster , Tristan\u00a0 J. Webb , Xin Wang , Marcel Nassar , Arjun\u00a0 K. Bansal , William\u00a0 H. Constable , O\u011fuz\u00a0 H. Elibol , Scott Gray , Stewart Hall , Luke Hornof , Amir Khosrowshahi , Carey Kloss , Ruby\u00a0 J. Pai , and Naveen Rao . 2017 . Flexpoint: An Adaptive Numerical Format for Efficient Training of Deep Neural Networks . In Proceedings of the 31st International Conference on Neural Information Processing Systems(NIPS\u201917) . Curran Associates Inc., USA, 1740\u20131750. http:\/\/dl.acm.org\/citation.cfm?id=3294771.3294937 Urs K\u00f6ster, Tristan\u00a0J. Webb, Xin Wang, Marcel Nassar, Arjun\u00a0K. Bansal, William\u00a0H. Constable, O\u011fuz\u00a0H. Elibol, Scott Gray, Stewart Hall, Luke Hornof, Amir Khosrowshahi, Carey Kloss, Ruby\u00a0J. Pai, and Naveen Rao. 2017. Flexpoint: An Adaptive Numerical Format for Efficient Training of Deep Neural Networks. In Proceedings of the 31st International Conference on Neural Information Processing Systems(NIPS\u201917). Curran Associates Inc., USA, 1740\u20131750. http:\/\/dl.acm.org\/citation.cfm?id=3294771.3294937"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304028"},{"key":"e_1_3_2_1_32_1","volume-title":"Logic Synthesis Using Synopsys","author":"Kurup Pran","unstructured":"Pran Kurup and Taher Abbasi . 2011. Logic Synthesis Using Synopsys ( 2 nd ed.). Springer Publishing Company, Inc orporated. Pran Kurup and Taher Abbasi. 2011. Logic Synthesis Using Synopsys(2nd ed.). Springer Publishing Company, Incorporated.","edition":"2"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358295"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2018.2865489"},{"key":"e_1_3_2_1_35_1","unstructured":"Tsung-Yi Lin Michael Maire Serge Belongie Lubomir Bourdev Ross Girshick James Hays Pietro Perona Deva Ramanan C.\u00a0Lawrence Zitnick and Piotr Doll\u00e1r. 2014. Microsoft COCO: Common Objects in Context. arxiv:cs.CV\/1405.0312  Tsung-Yi Lin Michael Maire Serge Belongie Lubomir Bourdev Ross Girshick James Hays Pietro Perona Deva Ramanan C.\u00a0Lawrence Zitnick and Piotr Doll\u00e1r. 2014. Microsoft COCO: Common Objects in Context. arxiv:cs.CV\/1405.0312"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001179"},{"key":"e_1_3_2_1_37_1","volume-title":"6th International Conference on Learning Representations","author":"Micikevicius Paulius","year":"2018","unstructured":"Paulius Micikevicius , Sharan Narang , Jonah Alben , Gregory\u00a0 F. Diamos , Erich Elsen , David Garc\u00eda , Boris Ginsburg , Michael Houston , Oleksii Kuchaiev , Ganesh Venkatesh , and Hao Wu. 2018. Mixed Precision Training . In 6th International Conference on Learning Representations , 2018 , Vancouver, BC , Canada, April 30 - May 3, 2018, Conference Track Proceedings . https:\/\/openreview.net\/forum?id=r1gs9JgRZ Paulius Micikevicius, Sharan Narang, Jonah Alben, Gregory\u00a0F. Diamos, Erich Elsen, David Garc\u00eda, Boris Ginsburg, Michael Houston, Oleksii Kuchaiev, Ganesh Venkatesh, and Hao Wu. 2018. Mixed Precision Training. In 6th International Conference on Learning Representations, 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings. https:\/\/openreview.net\/forum?id=r1gs9JgRZ"},{"key":"e_1_3_2_1_38_1","unstructured":"Inc. Micron\u00a0Technology. [n. d.]. DDR4 Power Calculator 4.0. https:\/\/www.micron.com\/~\/media\/documents\/products\/power-calculator\/ddr4_power_calc.xlsm.  Inc. Micron\u00a0Technology. [n. d.]. DDR4 Power Calculator 4.0. https:\/\/www.micron.com\/~\/media\/documents\/products\/power-calculator\/ddr4_power_calc.xlsm."},{"key":"e_1_3_2_1_39_1","volume-title":"International Conference on Machine Learning. 4646\u20134655","author":"Mostafa Hesham","year":"2019","unstructured":"Hesham Mostafa and Xin Wang . 2019 . Parameter efficient training of deep convolutional neural networks by dynamic sparse reparameterization . In International Conference on Machine Learning. 4646\u20134655 . Hesham Mostafa and Xin Wang. 2019. Parameter efficient training of deep convolutional neural networks by dynamic sparse reparameterization. In International Conference on Machine Learning. 4646\u20134655."},{"key":"e_1_3_2_1_40_1","unstructured":"NVIDIA. [n. d.]. Training With Mixed Precision. https:\/\/docs.nvidia.com\/deeplearning\/sdk\/mixed-precision-training\/index.html.  NVIDIA. [n. d.]. Training With Mixed Precision. https:\/\/docs.nvidia.com\/deeplearning\/sdk\/mixed-precision-training\/index.html."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080254"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/2370816.2370870"},{"key":"e_1_3_2_1_43_1","unstructured":"Intel\u00a0AI PlaidML. 2017. PlaidML. https:\/\/vertexai-plaidml.readthedocs-hosted.com\/en\/latest\/index.html  Intel\u00a0AI PlaidML. 2017. PlaidML. https:\/\/vertexai-plaidml.readthedocs-hosted.com\/en\/latest\/index.html"},{"key":"e_1_3_2_1_44_1","volume-title":"SIGMA: A Sparse and Irregular GEMM Accelerator with Flexible Interconnects for DNN Training. In IEEE International Symposium on High Performance Computer Architecture, HPCA 2020","author":"Qin Eric","year":"2020","unstructured":"Eric Qin , Ananda Samajdar , Hyoukjun Kwon , Vineet Nadella , Sudarshan Srinivasan , Dipankar The , Bharat Kaul , and Tushar Krishna . 2020 . SIGMA: A Sparse and Irregular GEMM Accelerator with Flexible Interconnects for DNN Training. In IEEE International Symposium on High Performance Computer Architecture, HPCA 2020 , San Diego, CA, USA , February 22-26, 2020. IEEE, 58\u201370. https:\/\/doi.org\/10.1109 \/ HPCA47549.2020.00015 Eric Qin, Ananda Samajdar, Hyoukjun Kwon, Vineet Nadella, Sudarshan Srinivasan, Dipankar The, Bharat Kaul, and Tushar Krishna. 2020. SIGMA: A Sparse and Irregular GEMM Accelerator with Flexible Interconnects for DNN Training. In IEEE International Symposium on High Performance Computer Architecture, HPCA 2020, San Diego, CA, USA, February 22-26, 2020. IEEE, 58\u201370. https:\/\/doi.org\/10.1109 \/ HPCA47549.2020.00015"},{"key":"e_1_3_2_1_45_1","volume-title":"ImageNet Large Scale Visual Recognition Challenge. CoRR abs\/1409.0575 (Sept","author":"Russakovsky Olga","year":"2014","unstructured":"Olga Russakovsky , Jia Deng , Hao Su , Jonathan Krause , Sanjeev Satheesh , Sean Ma , Zhiheng Huang , Andrej Karpathy , Aditya Khosla , Michael Bernstein , Alexander\u00a0 C. Berg , and Li Fei-Fei . 2014. ImageNet Large Scale Visual Recognition Challenge. CoRR abs\/1409.0575 (Sept . 2014 ). Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy, Aditya Khosla, Michael Bernstein, Alexander\u00a0C. Berg, and Li Fei-Fei. 2014. ImageNet Large Scale Visual Recognition Challenge. CoRR abs\/1409.0575 (Sept. 2014)."},{"key":"e_1_3_2_1_46_1","unstructured":"Charbel Sakr Naigang Wang Chia-Yu Chen Jungwook Choi Ankur Agrawal Naresh Shanbhag and Kailash Gopalakrishnan. 2019. Accumulation Bit-Width Scaling For Ultra-Low Precision Training Of Deep Networks. arxiv:cs.LG\/1901.06588  Charbel Sakr Naigang Wang Chia-Yu Chen Jungwook Choi Ankur Agrawal Naresh Shanbhag and Kailash Gopalakrishnan. 2019. Accumulation Bit-Width Scaling For Ultra-Low Precision Training Of Deep Networks. arxiv:cs.LG\/1901.06588"},{"key":"e_1_3_2_1_47_1","unstructured":"Charbel Sakr Naigang Wang Chia-Yu Chen Jungwook Choi Ankur Agrawal Naresh Shanbhag and Kailash Gopalakrishnan. 2019. Accumulation Bit-Width Scaling For Ultra-Low Precision Training Of Deep Networks. arxiv:cs.LG\/1901.06588  Charbel Sakr Naigang Wang Chia-Yu Chen Jungwook Choi Ankur Agrawal Naresh Shanbhag and Kailash Gopalakrishnan. 2019. Accumulation Bit-Width Scaling For Ultra-Low Precision Training Of Deep Networks. arxiv:cs.LG\/1901.06588"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322255"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3195970.3196072"},{"key":"e_1_3_2_1_50_1","volume-title":"Bit Fusion: Bit-Level Dynamically Composable Architecture for Accelerating Deep Neural Network","author":"Sharma Hardik","year":"2018","unstructured":"Hardik Sharma , Jongse Park , Naveen Suda , Liangzhen Lai , Benson Chau , Vikas Chandra , and Hadi Esmaeilzadeh . 2018 . Bit Fusion: Bit-Level Dynamically Composable Architecture for Accelerating Deep Neural Network . In ISCA. IEEE Computer Society , 764\u2013775. Hardik Sharma, Jongse Park, Naveen Suda, Liangzhen Lai, Benson Chau, Vikas Chandra, and Hadi Esmaeilzadeh. 2018. Bit Fusion: Bit-Level Dynamically Composable Architecture for Accelerating Deep Neural Network. In ISCA. IEEE Computer Society, 764\u2013775."},{"key":"e_1_3_2_1_51_1","volume-title":"2015 International Conference on Advances in Computing, Communications and Informatics, ICACCI 2015","author":"R.","year":"2015","unstructured":"Jitesh\u00a0 R. Shinde and Suresh\u00a0S. Salankar. 2015. VLSI implementation of bit serial architecture based multiplier in floating point arithmetic . In 2015 International Conference on Advances in Computing, Communications and Informatics, ICACCI 2015 , Kochi, India , August 10-13, 2015 . IEEE, 1672\u20131677. https:\/\/doi.org\/10.1109\/ICACCI.2015.7275854 10.1109\/ICACCI.2015.7275854 Jitesh\u00a0R. Shinde and Suresh\u00a0S. Salankar. 2015. VLSI implementation of bit serial architecture based multiplier in floating point arithmetic. In 2015 International Conference on Advances in Computing, Communications and Informatics, ICACCI 2015, Kochi, India, August 10-13, 2015. IEEE, 1672\u20131677. https:\/\/doi.org\/10.1109\/ICACCI.2015.7275854"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080244"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.5555\/3327757.3327866"},{"key":"e_1_3_2_1_54_1","unstructured":"Shibo Wang and Pankaj Kanwar. 2019. BFloat16: The secret to high performance on Cloud TPUs. https:\/\/cloud.google.com\/blog\/products\/ai-machine-learning\/bfloat16-the-secret-to-high-performance-on-cloud-tpus  Shibo Wang and Pankaj Kanwar. 2019. BFloat16: The secret to high performance on Cloud TPUs. https:\/\/cloud.google.com\/blog\/products\/ai-machine-learning\/bfloat16-the-secret-to-high-performance-on-cloud-tpus"},{"key":"e_1_3_2_1_55_1","unstructured":"Zelun Wang and Jyh-Charn Liu. 2019. Translating Math Formula Images to LaTeX Sequences Using Deep Neural Networks with Sequence-level Training. arxiv:cs.LG\/1908.11415  Zelun Wang and Jyh-Charn Liu. 2019. Translating Math Formula Images to LaTeX Sequences Using Deep Neural Networks with Sequence-level Training. arxiv:cs.LG\/1908.11415"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654926"},{"key":"e_1_3_2_1_57_1","unstructured":"Zhilin Yang Zihang Dai Yiming Yang Jaime Carbonell Ruslan Salakhutdinov and Quoc\u00a0V. Le. 2019. XLNet: Generalized Autoregressive Pretraining for Language Understanding. arxiv:cs.CL\/1906.08237  Zhilin Yang Zihang Dai Yiming Yang Jaime Carbonell Ruslan Salakhutdinov and Quoc\u00a0V. Le. 2019. XLNet: Generalized Autoregressive Pretraining for Language Understanding. arxiv:cs.CL\/1906.08237"},{"key":"e_1_3_2_1_58_1","volume-title":"Advances in Neural Information Processing Systems 32, H.\u00a0Wallach, H.\u00a0Larochelle, A.\u00a0Beygelzimer, F.\u00a0d'Alch\u00e9-Buc, E.\u00a0Fox, and R.\u00a0Garnett (Eds.). Curran Associates","author":"Zellers Rowan","unstructured":"Rowan Zellers , Ari Holtzman , Hannah Rashkin , Yonatan Bisk , Ali Farhadi , Franziska Roesner , and Yejin Choi . 2019. Defending Against Neural Fake News . In Advances in Neural Information Processing Systems 32, H.\u00a0Wallach, H.\u00a0Larochelle, A.\u00a0Beygelzimer, F.\u00a0d'Alch\u00e9-Buc, E.\u00a0Fox, and R.\u00a0Garnett (Eds.). Curran Associates , Inc ., 9054\u20139065. http:\/\/papers.nips.cc\/paper\/9106-defending-against-neural-fake-news.pdf Rowan Zellers, Ari Holtzman, Hannah Rashkin, Yonatan Bisk, Ali Farhadi, Franziska Roesner, and Yejin Choi. 2019. Defending Against Neural Fake News. In Advances in Neural Information Processing Systems 32, H.\u00a0Wallach, H.\u00a0Larochelle, A.\u00a0Beygelzimer, F.\u00a0d'Alch\u00e9-Buc, E.\u00a0Fox, and R.\u00a0Garnett (Eds.). Curran Associates, Inc., 9054\u20139065. http:\/\/papers.nips.cc\/paper\/9106-defending-against-neural-fake-news.pdf"},{"key":"e_1_3_2_1_59_1","volume-title":"Cambricon-X: An Accelerator for Sparse Neural Networks. In Intl\u2019 Symp. on Microarchitecture.","author":"Zhang Shijin","year":"2016","unstructured":"Shijin Zhang , Zidong Du , Lei Zhang , Huiying Lan , Shaoli Liu , Ling Li , Qi Guo , Tianshi Chen , and Yunji Chen . 2016 . Cambricon-X: An Accelerator for Sparse Neural Networks. In Intl\u2019 Symp. on Microarchitecture. Shijin Zhang, Zidong Du, Lei Zhang, Huiying Lan, Shaoli Liu, Ling Li, Qi Guo, Tianshi Chen, and Yunji Chen. 2016. Cambricon-X: An Accelerator for Sparse Neural Networks. In Intl\u2019 Symp. on Microarchitecture."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00011"}],"event":{"name":"MICRO '21: 54th Annual IEEE\/ACM International Symposium on Microarchitecture","location":"Virtual Event Greece","acronym":"MICRO '21","sponsor":["SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing"]},"container-title":["MICRO-54: 54th Annual IEEE\/ACM International Symposium on Microarchitecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3466752.3480106","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3466752.3480106","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:18:56Z","timestamp":1750191536000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3466752.3480106"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,17]]},"references-count":60,"alternative-id":["10.1145\/3466752.3480106","10.1145\/3466752"],"URL":"https:\/\/doi.org\/10.1145\/3466752.3480106","relation":{},"subject":[],"published":{"date-parts":[[2021,10,17]]},"assertion":[{"value":"2021-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}