{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T16:11:06Z","timestamp":1774627866173,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T00:00:00Z","timestamp":1717372800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"publisher","award":["DEAC02-06CH11357"],"award-info":[{"award-number":["DEAC02-06CH11357"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"publisher","award":["0F-60169"],"award-info":[{"award-number":["0F-60169"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2106634"],"award-info":[{"award-number":["2106634"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2106635"],"award-info":[{"award-number":["2106635"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,3]]},"DOI":"10.1145\/3625549.3658685","type":"proceedings-article","created":{"date-parts":[[2024,8,30]],"date-time":"2024-08-30T15:55:29Z","timestamp":1725033329000},"page":"227-239","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":26,"title":["DataStates-LLM: Lazy Asynchronous Checkpointing for Large Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8200-0148","authenticated-orcid":false,"given":"Avinash","family":"Maurya","sequence":"first","affiliation":[{"name":"Department of Computer Science, Rochester Institute of Technology, Rochester, New York, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1464-729X","authenticated-orcid":false,"given":"Robert","family":"Underwood","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, Illinois, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5034-2880","authenticated-orcid":false,"given":"M. Mustafa","family":"Rafique","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Rochester Institute of Technology, Rochester, New York, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7890-3934","authenticated-orcid":false,"given":"Franck","family":"Cappello","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, Illinois, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0661-7509","authenticated-orcid":false,"given":"Bogdan","family":"Nicolae","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, Illinois, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,8,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"IPDPS'09: International Symposium on Parallel & Distributed Processing. IEEE","author":"Ansel Jason","year":"2009","unstructured":"Jason Ansel, Kapil Arya, and Gene Cooperman. 2009. DMTCP: Transparent checkpointing for cluster computations and the desktop. In IPDPS'09: International Symposium on Parallel & Distributed Processing. IEEE, Rome, Italy, 1--12."},{"key":"e_1_3_2_1_2_1","volume-title":"Canary: Fault-Tolerant FaaS for Stateful Time-Sensitive Applications. In SC22: International Conference for High Performance Computing, Networking, Storage and Analysis. IEEE","author":"Arif Moiz","unstructured":"Moiz Arif, Kevin Assogba, and M. Mustafa Rafique. 2022. Canary: Fault-Tolerant FaaS for Stateful Time-Sensitive Applications. In SC22: International Conference for High Performance Computing, Networking, Storage and Analysis. IEEE, Dallas, TX, USA, 1--16."},{"key":"e_1_3_2_1_3_1","volume-title":"SC'11: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis. IEEE","author":"Bautista-Gomez Leonardo","year":"2011","unstructured":"Leonardo Bautista-Gomez, Seiji Tsuboi, Dimitri Komatitsch, Franck Cappello, Naoya Maruyama, and Satoshi Matsuoka. 2011. FTI: High performance Fault Tolerance Interface for hybrid systems. In SC'11: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis. IEEE, Seattle, WA, USA, 1--12."},{"key":"e_1_3_2_1_4_1","volume-title":"ICCD'23: Proceedings of the International Conference on Computer Design. IEEE","author":"Chen Menglei","year":"2023","unstructured":"Menglei Chen, Yu Hua, Rong Bai, and Jianming Huang. 2023. A Cost-Efficient Failure-Tolerant Scheme for Distributed DNN Training. In ICCD'23: Proceedings of the International Conference on Computer Design. IEEE, Milan, Italy, 150--157."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2021.3061394"},{"key":"e_1_3_2_1_6_1","first-page":"1","article-title":"Palm: Scaling language modeling with pathways. JMLR'23","volume":"24","author":"Chowdhery Aakanksha","year":"2023","unstructured":"Aakanksha Chowdhery, Sharan Narang, Jacob Devlin, Maarten Bosma, Gaurav Mishra, Adam Roberts, Paul Barham, Hyung Won Chung, Charles Sutton, Sebastian Gehrmann, et al. 2023. Palm: Scaling language modeling with pathways. JMLR'23: Journal of Machine Learning Research 24, 240 (2023), 1--113.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_7_1","article-title":"Switch transformers: scaling to trillion parameter models with simple and efficient sparsity. JMLR'22","volume":"23","author":"Fedus William","year":"2022","unstructured":"William Fedus, Barret Zoph, and Noam Shazeer. 2022. Switch transformers: scaling to trillion parameter models with simple and efficient sparsity. JMLR'22: Journal of Machine Learning Research 23, 1, Article 120 (jan 2022), 39 pages.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.softx.2020.100561"},{"key":"e_1_3_2_1_9_1","volume-title":"ISPDC'23: Proceedings of the International Conference on Parallel and Distributed Computing. IEEE","author":"Gossman Mikaila","year":"2023","unstructured":"Mikaila Gossman, Bogdan Nicolae, and Jon Calhoun. 2023. Modeling MultiThreaded Aggregated I\/O for Asynchronous Checkpointing on HPC Systems. In ISPDC'23: Proceedings of the International Conference on Parallel and Distributed Computing. IEEE, Bucharest, Romania, 101--105. https:\/\/hal.inria.fr\/hal-04343661"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/46\/1\/067"},{"key":"e_1_3_2_1_11_1","volume-title":"Deep Residual Learning for Image Recognition. In Conference on Computer Vision and Pattern Recognition (CVPR). IEEE","author":"He Kaiming","year":"2016","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2016. Deep Residual Learning for Image Recognition. In Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Las Vegas, USA, 770--778."},{"key":"e_1_3_2_1_12_1","volume-title":"Unicron: Economizing Self-Healing LLM Training at Scale. arXiv:2401.00134 [cs.DC]","author":"He Tao","year":"2023","unstructured":"Tao He, Xue Li, Zhibin Wang, Kun Qian, Jingbo Xu, Wenyuan Yu, and Jingren Zhou. 2023. Unicron: Economizing Self-Healing LLM Training at Scale. arXiv:2401.00134 [cs.DC]"},{"key":"e_1_3_2_1_13_1","volume-title":"PaPoC'20: The 7th Workshop on Principles and Practice of Consistency for Distributed Data. ACM","author":"Howard Heidi","year":"2020","unstructured":"Heidi Howard and Richard Mortier. 2020. Paxos vs Raft: have we reached consensus on distributed consensus?. In PaPoC'20: The 7th Workshop on Principles and Practice of Consistency for Distributed Data. ACM, Heraklion, Greece, Article 8, 9 pages."},{"key":"e_1_3_2_1_14_1","volume-title":"NeurIPS'19: Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch\u00e9-Buc","author":"Huang Yanping","unstructured":"Yanping Huang, Youlong Cheng, Ankur Bapna, Orhan Firat, Dehao Chen, Mia Chen, HyoukJoong Lee, Jiquan Ngiam, Quoc V Le, Yonghui Wu, and zhifeng Chen. 2019. GPipe: Efficient Training of Giant Neural Networks using Pipeline Parallelism. In NeurIPS'19: Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. d'Alch\u00e9-Buc, E. Fox, and R. Garnett (Eds.), Vol. 32. Curran Associates, Inc., Vancouver, Canada."},{"key":"e_1_3_2_1_15_1","volume-title":"Kingma and Jimmy Ba","author":"Diederik","year":"2017","unstructured":"Diederik P. Kingma and Jimmy Ba. 2017. Adam: A Method for Stochastic Optimization. arXiv:1412.6980 [cs.LG]"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.14778\/3415478.3415530"},{"key":"e_1_3_2_1_17_1","unstructured":"PyTorch Lightning. 2023. Welcome to PyTorch Lightning --- PyTorch Lightning 2.1.0 Documentation. https:\/\/lightning.ai\/docs\/pytorch\/stable\/."},{"key":"e_1_3_2_1_18_1","unstructured":"Junyang Lin An Yang Jinze Bai Chang Zhou Le Jiang Xianyan Jia Ang Wang Jie Zhang Yong Li Wei Lin Jingren Zhou and Hongxia Yang. 2022. M6-10T: A Sharing-Delinking Paradigm for Efficient Multi-Trillion Parameter Pretraining. https:\/\/openreview.net\/forum?id=TXqemS7XEH"},{"key":"e_1_3_2_1_19_1","volume-title":"MASCOTS'21: The 29th IEEE International Symposium on the Modeling, Analysis, and Simulation of Computer and Telecommunication Systems. IEEE","author":"Maurya Avinash","year":"2021","unstructured":"Avinash Maurya, Bogdan Nicolae, Mustafa Rafique, Thierry Tonellot, and Franck Cappello. 2021. Towards Efficient I\/O Scheduling for Collaborative Multi-Level Checkpointing. In MASCOTS'21: The 29th IEEE International Symposium on the Modeling, Analysis, and Simulation of Computer and Telecommunication Systems. IEEE, Virtual, Portugal, 1--8. https:\/\/hal.inria.fr\/hal-03344362"},{"key":"e_1_3_2_1_20_1","volume-title":"Towards Efficient Cache Allocation for High-Frequency Checkpointing. In HiPC'22: The 29th IEEE International Conference on High Performance Computing, Data, and Analytics. IEEE","author":"Maurya Avinash","year":"2022","unstructured":"Avinash Maurya, Bogdan Nicolae, M. Mustafa Rafique, Amr M. Elsayed, Thierry Tonellot, and Franck Cappello. 2022. Towards Efficient Cache Allocation for High-Frequency Checkpointing. In HiPC'22: The 29th IEEE International Conference on High Performance Computing, Data, and Analytics. IEEE, Bangalore, India, 262--271."},{"key":"e_1_3_2_1_21_1","volume-title":"GPU-Enabled Asynchronous Multi-level Checkpoint Caching and Prefetching. In HPDC'23: The 32nd International Symposium on High-Performance Parallel and Distributed Computing. ACM","author":"Maurya Avinash","year":"2023","unstructured":"Avinash Maurya, Mustafa Rafique, Thierry Tonellot, Hussain AlSalem, Franck Cappello, and Bogdan Nicolae. 2023. GPU-Enabled Asynchronous Multi-level Checkpoint Caching and Prefetching. In HPDC'23: The 32nd International Symposium on High-Performance Parallel and Distributed Computing. ACM, Orlando, USA, 73--85. https:\/\/hal.inria.fr\/hal-04119928"},{"key":"e_1_3_2_1_22_1","volume-title":"HIPC'23: 30th IEEE International Conference on High Performance Computing, Data, and Analytics. IEEE","author":"Maurya Avinash","year":"2023","unstructured":"Avinash Maurya, M. Mustafa Rafique, Franck Cappello, and Bogdan Nicolae. 2023. Towards Efficient I\/O Pipelines using Accumulated Compression. In HIPC'23: 30th IEEE International Conference on High Performance Computing, Data, and Analytics. IEEE, Goa, India, 256--265."},{"key":"e_1_3_2_1_23_1","unstructured":"Microsoft. 2023. Optimize Checkpoint Performance for Large Models - Azure Machine Learning. https:\/\/learn.microsoft.com\/en-us\/azure\/machine-learning\/reference-checkpoint-performance-for-large-models."},{"key":"e_1_3_2_1_24_1","volume-title":"Fine-Grained DNN Checkpointing. In FAST'21: The 19th USENIX Conference on File and Storage Technologies. USENIX Association","author":"Mohan Jayashree","year":"2021","unstructured":"Jayashree Mohan, Amar Phanishayee, and Vijay Chidambaram. 2021. Check-Freq: Frequent, Fine-Grained DNN Checkpointing. In FAST'21: The 19th USENIX Conference on File and Storage Technologies. USENIX Association, Boston, USA, 203--216."},{"key":"e_1_3_2_1_25_1","volume-title":"DeepFreeze: Towards Scalable Asynchronous Checkpointing of Deep Learning Models. In CCGrid'20: The 20th International Symposium on Cluster, Cloud and Internet Computing. IEEE\/ACM","author":"Nicolae Bogdan","year":"2020","unstructured":"Bogdan Nicolae, Jiali Li, Justin M. Wozniak, George Bosilca, Matthieu Dorier, and Franck Cappello. 2020. DeepFreeze: Towards Scalable Asynchronous Checkpointing of Deep Learning Models. In CCGrid'20: The 20th International Symposium on Cluster, Cloud and Internet Computing. IEEE\/ACM, Melbourne, Australia, 172--181."},{"key":"e_1_3_2_1_26_1","volume-title":"IPDPS'19: IEEE International Parallel and Distributed Processing Symposium. IEEE, Rio de Janeiro, Brazil, 911--920","author":"Nicolae Bogdan","year":"2019","unstructured":"Bogdan Nicolae, Adam Moody, Elsa Gonsiorowski, Kathryn Mohror, and Franck Cappello. 2019. VeloC: Towards High Performance Adaptive Asynchronous Checkpointing at Large Scale. In IPDPS'19: IEEE International Parallel and Distributed Processing Symposium. IEEE, Rio de Janeiro, Brazil, 911--920."},{"key":"e_1_3_2_1_27_1","volume-title":"IPDPS'11: Proceedings of the International Symposium on Parallel and Distributed Processing Workshops and Phd Forum. IEEE","author":"Nukada Akira","year":"2011","unstructured":"Akira Nukada, Hiroyuki Takizawa, and Satoshi Matsuoka. 2011. NVCR: A transparent checkpoint-restart library for NVIDIA CUDA. In IPDPS'11: Proceedings of the International Symposium on Parallel and Distributed Processing Workshops and Phd Forum. IEEE, Anchorage, AK, USA, 104--113."},{"key":"e_1_3_2_1_28_1","volume-title":"CC-GRID'20: The International Symposium on Cluster, Cloud and Internet Computing (CCGRID). IEEE\/ACM","author":"Parasyris Konstantinos","year":"2020","unstructured":"Konstantinos Parasyris, Kai Keller, Leonardo Bautista-Gomez, and Osman Unsal. 2020. Checkpoint restart support for heterogeneous hpc applications. In CC-GRID'20: The International Symposium on Cluster, Cloud and Internet Computing (CCGRID). IEEE\/ACM, Melbourne, Australia, 242--251."},{"key":"e_1_3_2_1_29_1","volume-title":"ICPP'13: The International Conference on Parallel Processing. IEEE","author":"Potluri Sreeram","year":"2013","unstructured":"Sreeram Potluri, Khaled Hamidouche, Akshay Venkatesh, Devendar Bureddy, and Dhabaleswar K Panda. 2013. Efficient inter-node MPI communication using GPUDirect RDMA for InfiniBand clusters with NVIDIA GPUs. In ICPP'13: The International Conference on Parallel Processing. IEEE, Lyon, France, 80--89."},{"key":"e_1_3_2_1_30_1","unstructured":"PyTorch. 2024. Welcome to the TorchSnapshot documentation. https:\/\/pytorch.org\/torchsnapshot\/stable\/."},{"key":"e_1_3_2_1_31_1","unstructured":"PyTorch-Lightning. 2024. AsyncCheckpointIO- PyTorch Lightning. https:\/\/lightning.ai\/docs\/pytorch\/stable\/api\/lightning.pytorch.plugins.io.AsyncCheckpointIO.html."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Samyam Rajbhandari Jeff Rasley Olatunji Ruwase and Yuxiong He. 2020. ZeRO: Memory Optimizations Toward Training Trillion Parameter Models. arXiv:1910.02054 [cs stat]","DOI":"10.1109\/SC41405.2020.00024"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476205"},{"key":"e_1_3_2_1_34_1","volume-title":"KDD'20: The 26th SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, Virtual Event CA USA, 3505--3506","author":"Rasley Jeff","year":"2020","unstructured":"Jeff Rasley, Samyam Rajbhandari, Olatunji Ruwase, and Yuxiong He. 2020. Deep-Speed: System Optimizations Enable Training Deep Learning Models with Over 100 Billion Parameters. In KDD'20: The 26th SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, Virtual Event CA USA, 3505--3506."},{"key":"e_1_3_2_1_35_1","unstructured":"Sebastian Ruder. 2017. An overview of gradient descent optimization algorithms. arXiv:1609.04747 [cs.LG]"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the 2003 Linux symposium","volume":"2003","author":"Philip","unstructured":"Philip Schwan et al. 2003. Lustre: Building a file system for 1000-node clusters. In Proceedings of the 2003 Linux symposium, Vol. 2003. Linux symposium, Ontario, Canada, 380--386."},{"key":"e_1_3_2_1_37_1","unstructured":"Mohammad Shoeybi Mostofa Patwary Raul Puri Patrick LeGresley Jared Casper and Bryan Catanzaro. 2020. Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism. arXiv:1909.08053 [cs]"},{"key":"e_1_3_2_1_38_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2015. Very Deep Convolutional Networks for Large-Scale Image Recognition. arXiv:1409.1556 [cs.CV]"},{"key":"e_1_3_2_1_39_1","volume-title":"Initiative: Enabling Large-Scale Scientific Discovery through Sophisticated AI System Technologies. arXiv:2310.04610 [cs]","author":"Song Shuaiwen Leon","year":"2023","unstructured":"Shuaiwen Leon Song, Bonnie Kruft, Minjia Zhang, Conglong Li, Shiyang Chen, et al. 2023. DeepSpeed4Science Initiative: Enabling Large-Scale Scientific Discovery through Sophisticated AI System Technologies. arXiv:2310.04610 [cs]"},{"key":"e_1_3_2_1_40_1","volume-title":"PD-CAT'09: The International Conference on Parallel and Distributed Computing, Applications and Technologies","author":"Takizawa Hiroyuki","unstructured":"Hiroyuki Takizawa, Katsuto Sato, Kazuhiko Komatsu, and Hiroaki Kobayashi. 2009. CheCUDA: A Checkpoint\/Restart Tool for CUDA Applications. In PD-CAT'09: The International Conference on Parallel and Distributed Computing, Applications and Technologies. IEEE, Higashi-Hiroshima, Japan, 408--413."},{"key":"e_1_3_2_1_41_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi et al. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. arXiv:2307.09288 [cs]"},{"key":"e_1_3_2_1_42_1","volume-title":"Bingsheng He, and Xiaowen Chu.","author":"Wang Yuxin","year":"2023","unstructured":"Yuxin Wang, Shaohuai Shi, Xin He, Zhenheng Tang, Xinglin Pan, Yang Zheng, Xiaoyu Wu, Amelie Chi Zhou, Bingsheng He, and Xiaowen Chu. 2023. Reliable and Efficient In-Memory Fault Tolerance of Large Language Model Pretraining. arXiv:2310.12670 [cs.DC]"},{"key":"e_1_3_2_1_43_1","volume-title":"GEMINI: Fast Failure Recovery in Distributed Training with In-Memory Checkpoints. In SOSP'23: The Proceedings of the 29th Symposium on Operating Systems Principles (SOSP '23)","author":"Wang Zhuang","year":"2023","unstructured":"Zhuang Wang, Zhen Jia, Shuai Zheng, Zhen Zhang, Xinwei Fu, T. S. Eugene Ng, and Yida Wang. 2023. GEMINI: Fast Failure Recovery in Distributed Training with In-Memory Checkpoints. In SOSP'23: The Proceedings of the 29th Symposium on Operating Systems Principles (SOSP '23). ACM, Koblenz, Germany, 364--381."},{"key":"e_1_3_2_1_44_1","volume-title":"Workshop, Teven Le Scao, Angela Fan, Christopher Akiki, Ellie Pavlick, Suzana Ili\u0107, et al.","year":"2023","unstructured":"BigScience Workshop, Teven Le Scao, Angela Fan, Christopher Akiki, Ellie Pavlick, Suzana Ili\u0107, et al. 2023. BLOOM: A 176B-Parameter Open-Access Multilingual Language Model. arXiv:2211.05100 [cs]"},{"key":"e_1_3_2_1_45_1","volume-title":"TRANSOM: An Efficient Fault-Tolerant System for Training LLMs. arXiv:2310.10046 [cs.DC]","author":"Wu Baodong","year":"2023","unstructured":"Baodong Wu, Lei Xia, Qingping Li, Kangyu Li, Xu Chen, Yongqiang Guo, Tieyao Xiang, Yuheng Chen, and Shigang Li. 2023. TRANSOM: An Efficient Fault-Tolerant System for Training LLMs. arXiv:2310.10046 [cs.DC]"},{"key":"e_1_3_2_1_46_1","volume-title":"Zixuan Ma, Yufei Xue, Jidong Zhai, Wenguang Chen, Peng Zhang, Yuxiao Dong, and Jie Tang.","author":"Zeng Aohan","year":"2023","unstructured":"Aohan Zeng, Xiao Liu, Zhengxiao Du, Zihan Wang, Hanyu Lai, Ming Ding, Zhuoyi Yang, Yifan Xu, Wendi Zheng, Xiao Xia, Weng Lam Tam, Zixuan Ma, Yufei Xue, Jidong Zhai, Wenguang Chen, Peng Zhang, Yuxiao Dong, and Jie Tang. 2023. GLM-130B: An Open Bilingual Pre-trained Model. arXiv:2210.02414 [cs.CL]"}],"event":{"name":"HPDC '24: 33rd International Symposium on High-Performance Parallel and Distributed Computing","location":"Pisa Italy","acronym":"HPDC '24","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the 33rd International Symposium on High-Performance Parallel and Distributed Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3625549.3658685","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3625549.3658685","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T22:50:38Z","timestamp":1750287038000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3625549.3658685"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,3]]},"references-count":46,"alternative-id":["10.1145\/3625549.3658685","10.1145\/3625549"],"URL":"https:\/\/doi.org\/10.1145\/3625549.3658685","relation":{},"subject":[],"published":{"date-parts":[[2024,6,3]]},"assertion":[{"value":"2024-08-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}