{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T14:37:44Z","timestamp":1771339064202,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,5,16]],"date-time":"2022-05-16T00:00:00Z","timestamp":1652659200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"NSF","award":["1901242 and 1910300"],"award-info":[{"award-number":["1901242 and 1910300"]}]},{"name":"IARPA TrojAI","award":["W911NF-19-S-0012"],"award-info":[{"award-number":["W911NF-19-S-0012"]}]},{"name":"ONR","award":["N000141712045, N000141410468 and N000141712947"],"award-info":[{"award-number":["N000141712045, N000141410468 and N000141712947"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,5,16]]},"DOI":"10.1145\/3522664.3528605","type":"proceedings-article","created":{"date-parts":[[2022,10,17]],"date-time":"2022-10-17T16:30:14Z","timestamp":1666024214000},"page":"65-76","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Checkpointing and deterministic training for deep learning"],"prefix":"10.1145","author":[{"given":"Xiangzhe","family":"Xu","sequence":"first","affiliation":[{"name":"Purdue University"}]},{"given":"Hongyu","family":"Liu","sequence":"additional","affiliation":[{"name":"Purdue University"}]},{"given":"Guanhong","family":"Tao","sequence":"additional","affiliation":[{"name":"Purdue University"}]},{"given":"Zhou","family":"Xuan","sequence":"additional","affiliation":[{"name":"Purdue University"}]},{"given":"Xiangyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Purdue University"}]}],"member":"320","published-online":{"date-parts":[[2022,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Wagner","author":"Carlini Nicholas","year":"2016","unstructured":"Nicholas Carlini and David A. Wagner. 2016. Towards Evaluating the Robustness of Neural Networks. CoRR abs\/1608.04644 (2016). arXiv:1608.04644 http:\/\/arxiv.org\/abs\/1608.04644"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_3_1","unstructured":"Duncan Riach. 2019. Deep Learning Determinism. https:\/\/pypi.org\/project\/tensorflow-determinism\/."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1519065.1519083"},{"key":"e_1_3_2_1_5_1","unstructured":"GroupLens. 2017. MovieLens datasets. https:\/\/movielens.org\/."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2909068"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 8th USENIX Conference on Operating Systems Design and Implementation","author":"Guo Zhenyu","year":"2008","unstructured":"Zhenyu Guo, Xi Wang, Jian Tang, Xuezheng Liu, Zhilei Xu, Ming Wu, M. Frans Kaashoek, and Zheng Zhang. 2008. R2: An Application-Level Kernel for Record and Replay. In Proceedings of the 8th USENIX Conference on Operating Systems Design and Implementation (San Diego, California) (OSDI'08). USENIX Association, USA, 193--208."},{"key":"e_1_3_2_1_8_1","unstructured":"Daniel Ho Eric Liang Ion Stoica Pieter Abbeel and Xi Chen. 2019. Population Based Augmentation: Efficient Learning of Augmentation Policy Schedules. In ICML."},{"key":"e_1_3_2_1_9_1","unstructured":"Hugging Face. 2019. Transformers. https:\/\/github.com\/huggingface\/transformers."},{"key":"e_1_3_2_1_10_1","unstructured":"Jennifer Villa Yoav Zimmerman. 2018. Reproducibility in ML: why it matters and how to achieve it. https:\/\/determined.ai\/blog\/reproducibility-in-ml\/."},{"key":"e_1_3_2_1_11_1","unstructured":"Keras. 2020. Callbacks API. https:\/\/keras.io\/api\/callbacks\/."},{"key":"e_1_3_2_1_12_1","unstructured":"Sungbin Lim Ildoo Kim Taesup Kim Chiheon Kim and Sungwoong Kim. 2019. Fast AutoAugment. In Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2884781.2884784"},{"key":"e_1_3_2_1_14_1","volume-title":"Trojaning Attack on Neural Networks. In 25nd Annual Network and Distributed System Security Symposium, NDSS 2018","author":"Liu Yingqi","year":"2018","unstructured":"Yingqi Liu, Shiqing Ma, Yousra Aafer, Wen-Chuan Lee, Juan Zhai, Weihang Wang, and Xiangyu Zhang. 2018. Trojaning Attack on Neural Networks. In 25nd Annual Network and Distributed System Security Symposium, NDSS 2018, San Diego, California, USA, February 18-221, 2018. The Internet Society."},{"key":"e_1_3_2_1_15_1","unstructured":"Aleksander Madry Aleksandar Makelov Ludwig Schmidt Dimitris Tsipras and Adrian Vladu. 2019. Towards Deep Learning Models Resistant to Adversarial Attacks. arXiv:1706.06083 [stat.ML]"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037751"},{"key":"e_1_3_2_1_17_1","volume-title":"Advanced Compiler Design and Implementation","author":"Muchnick Steven S.","unstructured":"Steven S. Muchnick. 1998. Advanced Compiler Design and Implementation. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA."},{"key":"e_1_3_2_1_18_1","volume-title":"Deterministic implementations for reproducibility in deep reinforcement learning. arXiv preprint arXiv:1809.05676","author":"Nagarajan Prabhat","year":"2018","unstructured":"Prabhat Nagarajan, Garrett Warnell, and Peter Stone. 2018. Deterministic implementations for reproducibility in deep reinforcement learning. arXiv preprint arXiv:1809.05676 (2018)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.5555\/1116644.1116674"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3324884.3416545"},{"key":"e_1_3_2_1_21_1","unstructured":"PyTorch. 2018. Deterministic cuDNN flag results in 2x speedup how is this possible? https:\/\/tinyurl.com\/y96yucrb."},{"key":"e_1_3_2_1_22_1","unstructured":"PyTorch. 2019. Reproducibility. https:\/\/pytorch.org\/docs\/stable\/notes\/randomness.html."},{"key":"e_1_3_2_1_23_1","unstructured":"PyTorch. 2020. ImageNet training in PyTorch. https:\/\/github.com\/pytorch\/examples\/tree\/master\/imagenet."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/1095810.1095833"},{"key":"e_1_3_2_1_25_1","volume-title":"100,000+ questions for machine comprehension of text. arXiv preprint arXiv:1606.05250","author":"Rajpurkar Pranav","year":"2016","unstructured":"Pranav Rajpurkar, Jian Zhang, Konstantin Lopyrev, and Percy Liang. 2016. Squad: 100,000+ questions for machine comprehension of text. arXiv preprint arXiv:1606.05250 (2016)."},{"key":"e_1_3_2_1_26_1","volume-title":"2018 IEEE International Symposium on High Performance Computer Architecture (HPCA). 180--193","author":"Shalabi Y.","unstructured":"Y. Shalabi, M. Yan, N. Honarmand, R. B. Lee, and J. Torrellas. 2018. Record-Replay Architecture as a General Security Framework. In 2018 IEEE International Symposium on High Performance Computer Architecture (HPCA). 180--193."},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of the Annual Conference on USENIX Annual Technical Conference","author":"Srinivasan Sudarshan M.","year":"2004","unstructured":"Sudarshan M. Srinivasan, Srikanth Kandula, Christopher R. Andrews, and Yuanyuan Zhou. 2004. Flashback: A Lightweight Extension for Rollback and Deterministic Replay for Software Debugging. In Proceedings of the Annual Conference on USENIX Annual Technical Conference (Boston, MA) (ATEC '04). USENIX Association, USA, 3."},{"key":"e_1_3_2_1_28_1","unstructured":"Stack Overflow. 2015. Tensorflow NaN bug? https:\/\/stackoverflow.com\/questions\/33712178\/tensorflow-nan-bug\/."},{"key":"e_1_3_2_1_29_1","unstructured":"StackOverflow. 2018. TensorFlow: Are my logits in the right format for cross entropy function? https:\/\/stackoverflow.com\/a\/36086477."},{"key":"e_1_3_2_1_30_1","unstructured":"TensorFlow. 2019. The Model Garden for TensorFlow. https:\/\/github.com\/tensorflow\/models."},{"key":"e_1_3_2_1_31_1","unstructured":"Tensorflow. 2020. ModelCheckpoint. https:\/\/tinyurl.com\/yxqnpr83."},{"key":"e_1_3_2_1_32_1","unstructured":"Tensorflow. 2022. ResNet in Tensorflow. https:\/\/github.com\/tensorflow\/models\/tree\/master\/official\/legacy\/image_classification\/resnet."},{"key":"e_1_3_2_1_33_1","volume-title":"Ensemble adversarial training: Attacks and defenses. arXiv preprint arXiv:1705.07204","author":"Tramer Florian","year":"2017","unstructured":"Florian Tramer, Alexey Kurakin, Nicolas Papernot, Ian Goodfellow, Dan Boneh, and Patrick McDaniel. 2017. Ensemble adversarial training: Attacks and defenses. arXiv preprint arXiv:1705.07204 (2017)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/1294261.1294275"},{"key":"e_1_3_2_1_35_1","volume-title":"2016 49th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). 1--14","author":"Yan M.","unstructured":"M. Yan, Y. Shalabi, and J. Torrellas. 2016. Replay Confusion: Detecting cache-based covert channel attacks using record and replay. In 2016 49th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). 1--14."},{"key":"e_1_3_2_1_36_1","unstructured":"Yangyang Guo. 2020. A pytorch GPU implementation of NCF. https:\/\/github.com\/guoyang9\/NCF."},{"key":"e_1_3_2_1_37_1","volume-title":"Adversarial examples: Attacks and defenses for deep learning","author":"Yuan Xiaoyong","year":"2019","unstructured":"Xiaoyong Yuan, Pan He, Qile Zhu, and Xiaolin Li. 2019. Adversarial examples: Attacks and defenses for deep learning. IEEE transactions on neural networks and learning systems 30, 9 (2019), 2805--2824."},{"key":"e_1_3_2_1_38_1","volume-title":"Laurent El Ghaoui, and Michael I. Jordan","author":"Zhang Hongyang","year":"2019","unstructured":"Hongyang Zhang, Yaodong Yu, Jiantao Jiao, Eric P. Xing, Laurent El Ghaoui, and Michael I. Jordan. 2019. Theoretically Principled Trade-off between Robustness and Accuracy. CoRR abs\/1901.08573 (2019). arXiv:1901.08573 http:\/\/arxiv.org\/abs\/1901.08573"}],"event":{"name":"CAIN '22: 1st Conference on AI Engineering - Software Engineering for AI","location":"Pittsburgh Pennsylvania","acronym":"CAIN '22","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","IEEE TCSC IEEE Technical Committee on Scalable Computing"]},"container-title":["Proceedings of the 1st International Conference on AI Engineering: Software Engineering for AI"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3522664.3528605","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3522664.3528605","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3522664.3528605","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:09:34Z","timestamp":1750183774000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3522664.3528605"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,16]]},"references-count":38,"alternative-id":["10.1145\/3522664.3528605","10.1145\/3522664"],"URL":"https:\/\/doi.org\/10.1145\/3522664.3528605","relation":{},"subject":[],"published":{"date-parts":[[2022,5,16]]},"assertion":[{"value":"2022-10-17","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}