{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:46:55Z","timestamp":1772909215734,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":27,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,7,8]],"date-time":"2022-07-08T00:00:00Z","timestamp":1657238400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"national science foundation","award":["2112356"],"award-info":[{"award-number":["2112356"]}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1925764"],"award-info":[{"award-number":["1925764"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,7,8]]},"DOI":"10.1145\/3491418.3530772","type":"proceedings-article","created":{"date-parts":[[2022,7,8]],"date-time":"2022-07-08T16:36:23Z","timestamp":1657298183000},"page":"1-9","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":13,"title":["Benchmarking the Performance of Accelerators on National Cyberinfrastructure Resources for Artificial Intelligence \/ Machine Learning Workloads"],"prefix":"10.1145","author":[{"given":"Abhinand","family":"Nasari","sequence":"first","affiliation":[{"name":"Texas A&amp;M University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hieu","family":"Le","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Richard","family":"Lawrence","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhenhua","family":"He","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xin","family":"Yang","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mario","family":"Krell","sequence":"additional","affiliation":[{"name":"Graphcore Inc., USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alex","family":"Tsyplikhin","sequence":"additional","affiliation":[{"name":"Graphcore Inc., USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mahidhar","family":"Tatineni","sequence":"additional","affiliation":[{"name":"University of California San Diego, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tim","family":"Cockerill","sequence":"additional","affiliation":[{"name":"University of Texas at Austin, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lisa","family":"Perez","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dhruva","family":"Chakravorty","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Honggao","family":"Liu","sequence":"additional","affiliation":[{"name":"Texas A&amp;M University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,7,8]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Practice and Experience in Advanced Research Computing","author":"Lau Michael","unstructured":"Michael Lau , Stuti Trivedi , Zhenhua He , Tri Pham , Lisa Perez , and Dhruva Chakravorty . 2021. Research Cloud Bazaar: A software defined cloud workflow cost management tool . In Practice and Experience in Advanced Research Computing , ACM , Boston MA USA , 1\u20134. DOI:https:\/\/doi.org\/10.1145\/3437359.3465602 10.1145\/3437359.3465602 Michael Lau, Stuti Trivedi, Zhenhua He, Tri Pham, Lisa Perez, and Dhruva Chakravorty. 2021. Research Cloud Bazaar: A software defined cloud workflow cost management tool. In Practice and Experience in Advanced Research Computing, ACM, Boston MA USA, 1\u20134. DOI:https:\/\/doi.org\/10.1145\/3437359.3465602"},{"key":"e_1_3_2_1_2_1","volume-title":"Retrieved","author":"Hooker Sara","year":"2020","unstructured":"Sara Hooker . 2020 . The Hardware Lottery. arXiv:2009.06489 [cs] (September 2020) . Retrieved February 16, 2022 from http:\/\/arxiv.org\/abs\/2009.06489 Sara Hooker. 2020. The Hardware Lottery. arXiv:2009.06489 [cs] (September 2020). Retrieved February 16, 2022 from http:\/\/arxiv.org\/abs\/2009.06489"},{"key":"e_1_3_2_1_3_1","volume-title":"The age of intelligent machines","author":"Kurzweil Ray","unstructured":"Ray Kurzweil . 1990. The age of intelligent machines . MIT Press , Cambridge, Mass . Ray Kurzweil. 1990. The age of intelligent machines. MIT Press, Cambridge, Mass."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/PGEC.1963.263588"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2012.6402918"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2005.251"},{"key":"e_1_3_2_1_8_1","volume-title":"Retrieved","author":"Jia Zhe","year":"2022","unstructured":"Zhe Jia , Marco Maggioni , Benjamin Staiger , and Daniele P. Scarpazza . 2018. Dissecting the NVIDIA Volta GPU Architecture via Microbenchmarking. arXiv:1804.06826 [cs] (April 2018) . Retrieved February 18, 2022 from http:\/\/arxiv.org\/abs\/1804.06826 Zhe Jia, Marco Maggioni, Benjamin Staiger, and Daniele P. Scarpazza. 2018. Dissecting the NVIDIA Volta GPU Architecture via Microbenchmarking. arXiv:1804.06826 [cs] (April 2018). Retrieved February 18, 2022 from http:\/\/arxiv.org\/abs\/1804.06826"},{"key":"e_1_3_2_1_9_1","unstructured":"The Extreme Science and Engineering Discovery Environment (XSEDE). 2021. Retrieved from https:\/\/www.xsede.org\/  The Extreme Science and Engineering Discovery Environment (XSEDE). 2021. Retrieved from https:\/\/www.xsede.org\/"},{"key":"e_1_3_2_1_10_1","unstructured":"National Science Foundation. 2021. Retrieved from https:\/\/beta.nsf.gov\/funding\/opportunities\/cyberinfrastructure-sustained-scientific-innovation-cssi  National Science Foundation. 2021. Retrieved from https:\/\/beta.nsf.gov\/funding\/opportunities\/cyberinfrastructure-sustained-scientific-innovation-cssi"},{"key":"e_1_3_2_1_11_1","unstructured":"COVID-19 HPC Consortium. 2021. Retrieved from https:\/\/covid19-hpc-consortium.org\/  COVID-19 HPC Consortium. 2021. Retrieved from https:\/\/covid19-hpc-consortium.org\/"},{"key":"e_1_3_2_1_12_1","unstructured":"National Science Foundation. 2021. Retrieved from https:\/\/www.cloudbank.org\/  National Science Foundation. 2021. Retrieved from https:\/\/www.cloudbank.org\/"},{"key":"e_1_3_2_1_13_1","volume-title":"https:\/\/github.com\/mlcommons\/","year":"2022","unstructured":"MLPerf , https:\/\/github.com\/mlcommons\/ , 2022 . The MLPerf name and logo are trademarks of MLCommons Association in the United States and other countries. All rights reserved. Unauthorized use is strictly prohibited. See www.mlcommons.org for more information. Results reported are not verified by the MLCommons\u2122 Association. MLPerf, https:\/\/github.com\/mlcommons\/, 2022. The MLPerf name and logo are trademarks of MLCommons Association in the United States and other countries. All rights reserved. Unauthorized use is strictly prohibited. See www.mlcommons.org for more information. Results reported are not verified by the MLCommons\u2122 Association."},{"key":"e_1_3_2_1_14_1","volume-title":"Deep Residual Learning for Image Recognition. In 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), IEEE","author":"He Kaiming","year":"2016","unstructured":"Kaiming He , Xiangyu Zhang , Shaoqing Ren , and Jian Sun . 2016 . Deep Residual Learning for Image Recognition. In 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), IEEE , Las Vegas, NV, USA, 770\u2013778. DOI:https:\/\/doi.org\/10.1109\/CVPR. 2016.90 10.1109\/CVPR.2016.90 Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2016. Deep Residual Learning for Image Recognition. In 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), IEEE, Las Vegas, NV, USA, 770\u2013778. DOI:https:\/\/doi.org\/10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_15_1","volume-title":"Retrieved","author":"Tan Mingxing","year":"2022","unstructured":"Mingxing Tan and Quoc V. Le . 2020. EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. arXiv:1905.11946 [cs, stat] (September 2020) . Retrieved February 18, 2022 from http:\/\/arxiv.org\/abs\/1905.11946 Mingxing Tan and Quoc V. Le. 2020. EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. arXiv:1905.11946 [cs, stat] (September 2020). Retrieved February 18, 2022 from http:\/\/arxiv.org\/abs\/1905.11946"},{"key":"e_1_3_2_1_16_1","volume-title":"Retrieved","author":"Howard Andrew G.","year":"2017","unstructured":"Andrew G. Howard , Menglong Zhu , Bo Chen , Dmitry Kalenichenko , Weijun Wang , Tobias Weyand , Marco Andreetto , and Hartwig Adam . 2017 . MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. arXiv:1704.04861 [cs] (April 2017) . Retrieved February 18, 2022 from http:\/\/arxiv.org\/abs\/1704.04861 Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, and Hartwig Adam. 2017. MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. arXiv:1704.04861 [cs] (April 2017). Retrieved February 18, 2022 from http:\/\/arxiv.org\/abs\/1704.04861"},{"key":"e_1_3_2_1_17_1","volume-title":"Retrieved","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . 2019 . BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv:1810.04805 [cs] (May 2019) . Retrieved February 18, 2022 from http:\/\/arxiv.org\/abs\/1810.04805 Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv:1810.04805 [cs] (May 2019). Retrieved February 18, 2022 from http:\/\/arxiv.org\/abs\/1810.04805"},{"key":"e_1_3_2_1_18_1","volume-title":"Retrieved","author":"Keskar Nitish Shirish","year":"2017","unstructured":"Nitish Shirish Keskar , Dheevatsa Mudigere , Jorge Nocedal , Mikhail Smelyanskiy , and Ping Tak Peter Tang . 2017 . On Large-Batch Training for Deep Learning: Generalization Gap and Sharp Minima. arXiv:1609.04836 [cs, math] (February 2017) . Retrieved February 16, 2022 from http:\/\/arxiv.org\/abs\/1609.04836 Nitish Shirish Keskar, Dheevatsa Mudigere, Jorge Nocedal, Mikhail Smelyanskiy, and Ping Tak Peter Tang. 2017. On Large-Batch Training for Deep Learning: Generalization Gap and Sharp Minima. arXiv:1609.04836 [cs, math] (February 2017). Retrieved February 16, 2022 from http:\/\/arxiv.org\/abs\/1609.04836"},{"key":"e_1_3_2_1_19_1","volume-title":"Retrieved","author":"Masters Dominic","year":"2018","unstructured":"Dominic Masters and Carlo Luschi . 2018 . Revisiting Small Batch Training for Deep Neural Networks. arXiv:1804.07612 [cs, stat] (April 2018) . Retrieved February 16, 2022 from http:\/\/arxiv.org\/abs\/1804.07612 Dominic Masters and Carlo Luschi. 2018. Revisiting Small Batch Training for Deep Neural Networks. arXiv:1804.07612 [cs, stat] (April 2018). Retrieved February 16, 2022 from http:\/\/arxiv.org\/abs\/1804.07612"},{"key":"e_1_3_2_1_20_1","volume-title":"A100 40GB PCIe Product Brief. (September","author":"NVIDIA.","year":"2020","unstructured":"NVIDIA. 2020. A100 40GB PCIe Product Brief. (September 2020 ). Retrieved from https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/Data-Center\/a100\/pdf\/A100-PCIE-Prduct-Brief.pdf NVIDIA. 2020. A100 40GB PCIe Product Brief. (September 2020). Retrieved from https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/Data-Center\/a100\/pdf\/A100-PCIE-Prduct-Brief.pdf"},{"key":"e_1_3_2_1_21_1","unstructured":"NVIDIA. 2022. NVIDIA RTX. Retrieved from https:\/\/www.nvidia.com\/en-us\/design-visualization\/rtx\/  NVIDIA. 2022. NVIDIA RTX. Retrieved from https:\/\/www.nvidia.com\/en-us\/design-visualization\/rtx\/"},{"key":"e_1_3_2_1_22_1","unstructured":"NVIDIA. 2022. NGC catalog. Retrieved from https:\/\/catalog.ngc.nvidia.com\/containers  NVIDIA. 2022. NGC catalog. Retrieved from https:\/\/catalog.ngc.nvidia.com\/containers"},{"key":"e_1_3_2_1_23_1","unstructured":"https:\/\/github.tamu.edu\/HPRC\/Graphcore_IPU_Benchmarks  https:\/\/github.tamu.edu\/HPRC\/Graphcore_IPU_Benchmarks"},{"key":"e_1_3_2_1_24_1","unstructured":"TAMU HPRC Wiki. 2021. Retrieved Feb 18 2022 from https:\/\/hprc.tamu.edu\/wiki\/  TAMU HPRC Wiki. 2021. Retrieved Feb 18 2022 from https:\/\/hprc.tamu.edu\/wiki\/"},{"key":"e_1_3_2_1_25_1","unstructured":"Graphcore documents. 2022. Retrieved Feb 18 2022 from https:\/\/docs.graphcore.ai\/en\/latest\/  Graphcore documents. 2022. Retrieved Feb 18 2022 from https:\/\/docs.graphcore.ai\/en\/latest\/"},{"key":"e_1_3_2_1_26_1","volume-title":"Retrieved","author":"Russakovsky Olga","year":"2015","unstructured":"Olga Russakovsky , Jia Deng , Hao Su , Jonathan Krause , Sanjeev Satheesh , Sean Ma , Zhiheng Huang , Andrej Karpathy , Aditya Khosla , Michael Bernstein , Alexander C. Berg , and Li Fei-Fei . 2015 . ImageNet Large Scale Visual Recognition Challenge. arXiv:1409.0575 [cs] (January 2015) . Retrieved February 17, 2022 from http:\/\/arxiv.org\/abs\/1409.0575 Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy, Aditya Khosla, Michael Bernstein, Alexander C. Berg, and Li Fei-Fei. 2015. ImageNet Large Scale Visual Recognition Challenge. arXiv:1409.0575 [cs] (January 2015). Retrieved February 17, 2022 from http:\/\/arxiv.org\/abs\/1409.0575"},{"key":"e_1_3_2_1_27_1","unstructured":"NVIDIA. 2022. NGC catalog. Retrieved from https:\/\/catalog.ngc.nvidia.com\/containers  NVIDIA. 2022. NGC catalog. Retrieved from https:\/\/catalog.ngc.nvidia.com\/containers"}],"event":{"name":"PEARC '22: Practice and Experience in Advanced Research Computing","location":"Boston MA USA","acronym":"PEARC '22","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Practice and Experience in Advanced Research Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3491418.3530772","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3491418.3530772","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3491418.3530772","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:30:48Z","timestamp":1750188648000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3491418.3530772"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,8]]},"references-count":27,"alternative-id":["10.1145\/3491418.3530772","10.1145\/3491418"],"URL":"https:\/\/doi.org\/10.1145\/3491418.3530772","relation":{},"subject":[],"published":{"date-parts":[[2022,7,8]]},"assertion":[{"value":"2022-07-08","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}