{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,8]],"date-time":"2025-11-08T13:46:05Z","timestamp":1762609565951,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":20,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,12]],"date-time":"2024-11-12T00:00:00Z","timestamp":1731369600000},"content-version":"vor","delay-in-days":366,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Department of Energy","award":["DE-AC02-06CH11357"],"award-info":[{"award-number":["DE-AC02-06CH11357"]}]},{"name":"National Science Foundation","award":["2303820,2303064,2247080,2311876,2312673"],"award-info":[{"award-number":["2303820,2303064,2247080,2311876,2312673"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,12]]},"DOI":"10.1145\/3624062.3624257","type":"proceedings-article","created":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T13:53:39Z","timestamp":1699624419000},"page":"1759-1766","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Benchmarking and In-depth Performance Study of Large Language Models on Habana Gaudi Processors"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3008-9133","authenticated-orcid":false,"given":"Chengming","family":"Zhang","sequence":"first","affiliation":[{"name":"Indiana University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9807-7978","authenticated-orcid":false,"given":"Baixi","family":"Sun","sequence":"additional","affiliation":[{"name":"Indiana University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6244-1264","authenticated-orcid":false,"given":"Xiaodong","family":"Yu","sequence":"additional","affiliation":[{"name":"Stevens Institute of Technology, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3516-2192","authenticated-orcid":false,"given":"Zhen","family":"Xie","sequence":"additional","affiliation":[{"name":"Binghamton University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2791-0031","authenticated-orcid":false,"given":"Weijian","family":"Zheng","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7000-4195","authenticated-orcid":false,"given":"Kamil A.","family":"Iskra","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9428-7801","authenticated-orcid":false,"given":"Pete","family":"Beckman","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5422-4497","authenticated-orcid":false,"given":"Dingwen","family":"Tao","sequence":"additional","affiliation":[{"name":"Indiana University, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2023,11,12]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Language models are few-shot learners. arXiv preprint arXiv:2005.14165","author":"Brown B","year":"2020","unstructured":"Tom\u00a0B Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, 2020. Language models are few-shot learners. arXiv preprint arXiv:2005.14165 (2020)."},{"key":"e_1_3_2_2_2_1","volume-title":"Generating long sequences with sparse transformers. arXiv preprint arXiv:1904.10509","author":"Child Rewon","year":"2019","unstructured":"Rewon Child, Scott Gray, Alec Radford, and Ilya Sutskever. 2019. Generating long sequences with sparse transformers. arXiv preprint arXiv:1904.10509 (2019)."},{"key":"e_1_3_2_2_3_1","volume-title":"Rethinking attention with performers. arXiv preprint arXiv:2009.14794","author":"Choromanski Krzysztof","year":"2020","unstructured":"Krzysztof Choromanski, Valerii Likhosherstov, David Dohan, Xingyou Song, Andreea Gane, Tamas Sarlos, Peter Hawkins, Jared Davis, Afroz Mohiuddin, Lukasz Kaiser, 2020. Rethinking attention with performers. arXiv preprint arXiv:2009.14794 (2020)."},{"key":"e_1_3_2_2_4_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_5_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_2_6_1","unstructured":"Habana. 2023. Gaudi Training Platform White Paper. https:\/\/www.intel.com\/content\/www\/us\/en\/content-details\/784830\/gaudi-training-platform-white-paper.html."},{"key":"e_1_3_2_2_7_1","unstructured":"Habana. 2023. Habana Custom Kernel. https:\/\/github.com\/HabanaAI\/Habana_Custom_Kernel."},{"key":"e_1_3_2_2_8_1","volume-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149","author":"Han Song","year":"2015","unstructured":"Song Han, Huizi Mao, and William\u00a0J Dally. 2015. Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149 (2015)."},{"key":"e_1_3_2_2_9_1","volume-title":"Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. 2015. Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00286"},{"key":"e_1_3_2_2_11_1","volume-title":"International conference on machine learning. PMLR, 5156\u20135165","author":"Katharopoulos Angelos","year":"2020","unstructured":"Angelos Katharopoulos, Apoorv Vyas, Nikolaos Pappas, and Fran\u00e7ois Fleuret. 2020. Transformers are rnns: Fast autoregressive transformers with linear attention. In International conference on machine learning. PMLR, 5156\u20135165."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2020.2975185"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2022.3148714"},{"key":"e_1_3_2_2_14_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_2_15_1","volume-title":"20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23)","author":"Thorpe John","year":"2023","unstructured":"John Thorpe, Pengzhan Zhao, Jonathan Eyolfson, Yifan Qiao, Zhihao Jia, Minjia Zhang, Ravi Netravali, and Guoqing\u00a0Harry Xu. 2023. Bamboo: Making Preemptible Instances Resilient for Affordable Training of Large { DNNs}. In 20th USENIX Symposium on Networked Systems Design and Implementation (NSDI 23). 497\u2013513."},{"key":"e_1_3_2_2_16_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_2_17_1","volume-title":"Linformer: Self-attention with linear complexity. arXiv preprint arXiv:2006.04768","author":"Wang Sinong","year":"2020","unstructured":"Sinong Wang, Belinda\u00a0Z Li, Madian Khabsa, Han Fang, and Hao Ma. 2020. Linformer: Self-attention with linear complexity. arXiv preprint arXiv:2006.04768 (2020)."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00043"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.11"}],"event":{"name":"SC-W 2023: Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis","acronym":"SC-W 2023","location":"Denver CO USA"},"container-title":["Proceedings of the SC '23 Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624257","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3624062.3624257","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3624062.3624257","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T03:01:38Z","timestamp":1755745298000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624257"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,12]]},"references-count":20,"alternative-id":["10.1145\/3624062.3624257","10.1145\/3624062"],"URL":"https:\/\/doi.org\/10.1145\/3624062.3624257","relation":{},"subject":[],"published":{"date-parts":[[2023,11,12]]},"assertion":[{"value":"2023-11-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}