{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,27]],"date-time":"2026-06-27T07:47:03Z","timestamp":1782546423383,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":13,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T00:00:00Z","timestamp":1779148800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62502391"],"award-info":[{"award-number":["62502391"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Natural Science Foundation of China","award":["62272394"],"award-info":[{"award-number":["62272394"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,5,19]]},"DOI":"10.1145\/3801487.3806069","type":"proceedings-article","created":{"date-parts":[[2026,6,27]],"date-time":"2026-06-27T07:05:47Z","timestamp":1782543947000},"page":"327-330","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["wdCP: Windowed Incremental Checkpointing for Efficient and Bounded LLM Recovery"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-9404-2103","authenticated-orcid":false,"given":"Wendi","family":"Cheng","sequence":"first","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, Shaanxi Province, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7680-1179","authenticated-orcid":false,"given":"Xiao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, Shaanxi Province, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5169-1206","authenticated-orcid":false,"given":"Xiaonan","family":"Zhao","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, Shaanxi Province, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6613-3801","authenticated-orcid":false,"given":"Xiaoling","family":"Shu","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, Shaanxi Province, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7661-5915","authenticated-orcid":false,"given":"Jinjiang","family":"Wang","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, Shaanxi Province, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5311-5782","authenticated-orcid":false,"given":"Shujie","family":"Han","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, Shaanxi Province, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,27]]},"reference":[{"key":"e_1_3_3_1_2_2","first-page":"2397","volume-title":"Proc. of ICML","author":"Biderman Stella","year":"2023","unstructured":"Stella Biderman, Hailey Schoelkopf, Quentin\u00a0Gregory Anthony, and Herbie Bradley. 2023. Pythia: A suite for analyzing large language models across training and scaling. In Proc. of ICML. 2397\u20132430."},{"key":"e_1_3_3_1_3_2","first-page":"929","volume-title":"Proc. of USENIX NSDI","author":"Eisenman Assaf","year":"2022","unstructured":"Assaf Eisenman, Kiran\u00a0Kumar Matam, Steven Ingram, and Dheevatsa Mudigere. 2022. Check-N-Run: A checkpointing system for training deep learning recommendation models. In Proc. of USENIX NSDI. 929\u2013943."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"Hongliang Li Zichen Wang Hairui Zhao and Meng Zhang. 2025. Convergence-aware optimal checkpointing for exploratory deep learning training jobs. Future Generation Computer Systems (2025).","DOI":"10.2139\/ssrn.4744794"},{"key":"e_1_3_3_1_5_2","first-page":"27575","volume-title":"Proc. of ICML","author":"Li Wenshuo","year":"2024","unstructured":"Wenshuo Li, Xinghao Chen, Han Shu, and Yehui Tang. 2024. ExCP: Extreme LLM checkpoint compression via weight-momentum joint shrinking. In Proc. of ICML. 27575\u201327588."},{"key":"e_1_3_3_1_6_2","first-page":"1519","volume-title":"Proc. of USENIX ATC","author":"Lian Xinyu","year":"2025","unstructured":"Xinyu Lian, Sam\u00a0Ade Jacobs, Lev Kurilenko, and Masahiro Tanaka. 2025. Universal checkpointing: A flexible and efficient distributed checkpointing system for large-scale DNN training with reconfigurable parallelism. In Proc. of USENIX ATC. 1519\u20131534."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"crossref","unstructured":"Qingyin Lin Jiangsu Du Rui Li and Zhiguang Chen. 2024. IncrCP: Decomposing and orchestrating incremental checkpoints for effective recommendation model training. Proc. of VLDB Endow. 18 4 (2024) 1049\u20131062.","DOI":"10.14778\/3717755.3717765"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3625549.3658685"},{"key":"e_1_3_3_1_9_2","unstructured":"Maxim Naumov Dheevatsa Mudigere Hao-Jun\u00a0Michael Shi and Huang. 2019. Deep learning recommendation model for personalization and recommendation systems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1906.00091 (2019)."},{"key":"e_1_3_3_1_10_2","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et\u00a0al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_3_1_11_2","first-page":"559","volume-title":"Proc. of USENIX NSDI","author":"Wan Borui","year":"2025","unstructured":"Borui Wan, Mingji Han, Yiyao Sheng, and Yanghua Peng. 2025. ByteCheckpoint: A unified checkpointing system for large foundation model development. In Proc. of USENIX NSDI. 559\u2013578."},{"key":"e_1_3_3_1_12_2","volume-title":"FastPersist: Accelerating Model Checkpointing in Deep Learning","author":"Wang Guanhua","year":"2024","unstructured":"Guanhua Wang, Olatunji Ruwase, Bing Xie, and Yuxiong He. 2024. FastPersist: Accelerating Model Checkpointing in Deep Learning. Technical Report. Microsoft Research. https:\/\/arxiv.org\/abs\/2406.13768 Preprint."},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3754598.3754606"},{"key":"e_1_3_3_1_14_2","unstructured":"Peiyuan Zhang Guangtao Zeng Tianduo Wang and Wei Lu. 2024. TinyLlama: An Open-Source Small Language Model. arxiv:https:\/\/arXiv.org\/abs\/2401.02385\u00a0[cs.CL]"}],"event":{"name":"CF '26: Proceedings of the 23rd ACM International Conference on Computing Frontiers","location":"Catania Italy","acronym":"CF '26","sponsor":["SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing"]},"container-title":["Proceedings of the 23rd ACM International Conference on Computing Frontiers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3801487.3806069","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,27]],"date-time":"2026-06-27T07:07:59Z","timestamp":1782544079000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3801487.3806069"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,19]]},"references-count":13,"alternative-id":["10.1145\/3801487.3806069","10.1145\/3801487"],"URL":"https:\/\/doi.org\/10.1145\/3801487.3806069","relation":{},"subject":[],"published":{"date-parts":[[2026,5,19]]},"assertion":[{"value":"2026-06-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}