{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T12:17:21Z","timestamp":1773317841047,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":92,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T00:00:00Z","timestamp":1763164800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000015","name":"DOE U.S. Department of Energy","doi-asserted-by":"publisher","award":["66150, DE-AC05-76RL01830."],"award-info":[{"award-number":["66150, DE-AC05-76RL01830."]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Commonwealth Cyber Initiative (CCI)","award":["HC-3Q24-047"],"award-info":[{"award-number":["HC-3Q24-047"]}]},{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2402940, 2410856, 2417750, 2018631"],"award-info":[{"award-number":["2402940, 2410856, 2417750, 2018631"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3712285.3759803","type":"proceedings-article","created":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T16:04:47Z","timestamp":1762963487000},"page":"1127-1144","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Demystifying the Resilience of Large Language Model Inference: An End-to-End Perspective"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7740-6082","authenticated-orcid":false,"given":"Yu","family":"Sun","sequence":"first","affiliation":[{"name":"George Mason University (GMU), Fairfax, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0821-3432","authenticated-orcid":false,"given":"Zachary","family":"Coalson","sequence":"additional","affiliation":[{"name":"Oregon State University, Corvallis, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2626-7865","authenticated-orcid":false,"given":"Shiyang","family":"Chen","sequence":"additional","affiliation":[{"name":"Rutgers University, New Brunswick, Piscataway, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6323-7388","authenticated-orcid":false,"given":"Hang","family":"Liu","sequence":"additional","affiliation":[{"name":"Rutgers University, New Brunswick, Piscataway, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5921-0035","authenticated-orcid":false,"given":"Zhao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Rutgers University, New Brunswick, Piscataway, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4154-7611","authenticated-orcid":false,"given":"Sanghyun","family":"Hong","sequence":"additional","affiliation":[{"name":"Oregon State University, Corvallis, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9721-3982","authenticated-orcid":false,"given":"Bo","family":"Fang","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory (PNNL), Richland, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0735-8617","authenticated-orcid":false,"given":"Lishan","family":"Yang","sequence":"additional","affiliation":[{"name":"George Mason University (GMU), Fairfax, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_3_2_2","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia\u00a0Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat et\u00a0al. 2023. Gpt-4 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.08774 (2023)."},{"key":"e_1_3_3_3_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISSRE59848.2023.00052"},{"key":"e_1_3_3_3_4_2","unstructured":"Anthropic. 2024. The Claude 3 Model Family: Opus Sonnet Haiku. https:\/\/api.semanticscholar.org\/CorpusID:270640496"},{"key":"e_1_3_3_3_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607084"},{"key":"e_1_3_3_3_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807670"},{"key":"e_1_3_3_3_7_2","unstructured":"Ron Banner Yury Nahshan and Daniel Soudry. 2019. Post training 4-bit quantization of convolutional networks for rapid-deployment. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_3_3_8_2","first-page":"131","volume-title":"First conference on machine translation","author":"Bojar Ondrej","year":"2016","unstructured":"Ondrej Bojar, Rajen Chatterjee, Christian Federmann, Yvette Graham, Barry Haddow, Matthias Huck, Antonio\u00a0Jimeno Yepes, Philipp Koehn, Varvara Logacheva, Christof Monz, et\u00a0al. 2016. Findings of the 2016 conference on machine translation (wmt16). In First conference on machine translation. Association for Computational Linguistics, 131\u2013198."},{"key":"e_1_3_3_3_9_2","doi-asserted-by":"crossref","unstructured":"Jakub Breier Dirmanto Jap Xiaolu Hou Shivam Bhasin and Yang Liu. 2021. SNIFF: reverse engineering of neural networks with fault attacks. IEEE Transactions on Reliability 71 4 (2021) 1527\u20131539.","DOI":"10.1109\/TR.2021.3105697"},{"key":"e_1_3_3_3_10_2","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared\u00a0D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et\u00a0al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877\u20131901."},{"key":"e_1_3_3_3_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00034"},{"key":"e_1_3_3_3_12_2","unstructured":"Mark Chen Jerry Tworek Heewoo Jun Qiming Yuan Henrique Ponde De\u00a0Oliveira Pinto Jared Kaplan Harri Edwards Yuri Burda Nicholas Joseph Greg Brockman et\u00a0al. 2021. Evaluating large language models trained on code. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2107.03374 (2021)."},{"key":"e_1_3_3_3_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/DSN48987.2021.00018"},{"key":"e_1_3_3_3_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISSRE5003.2020.00047"},{"key":"e_1_3_3_3_15_2","unstructured":"Peter Clark Isaac Cowhey Oren Etzioni Tushar Khot Ashish Sabharwal Carissa Schoenick and Oyvind Tafjord. 2018. Think you have solved question answering? try arc the ai2 reasoning challenge. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1803.05457 (2018)."},{"key":"e_1_3_3_3_16_2","unstructured":"Karl Cobbe Vineet Kosaraju Mohammad Bavarian Mark Chen Heewoo Jun Lukasz Kaiser Matthias Plappert Jerry Tworek Jacob Hilton Reiichiro Nakano Christopher Hesse and John Schulman. 2021. Training Verifiers to Solve Math Word Problems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2110.14168 (2021)."},{"key":"e_1_3_3_3_17_2","volume-title":"Llama-3.2-8X3B-MOE-Dark-Champion-Instruct-uncensored-abliterated-18.4B","year":"2025","unstructured":"DavidAU. 2025. Llama-3.2-8X3B-MOE-Dark-Champion-Instruct-uncensored-abliterated-18.4B. Hugging Face. https:\/\/huggingface.co\/DavidAU\/Llama-3.2-8X3B-MOE-Dark-Champion-Instruct-uncensored-abliterated-18.4B"},{"key":"e_1_3_3_3_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00041"},{"key":"e_1_3_3_3_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER59578.2024.00022"},{"key":"e_1_3_3_3_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2014.6844486"},{"key":"e_1_3_3_3_21_2","unstructured":"William Fedus Barret Zoph and Noam Shazeer. 2022. Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity. Journal of Machine Learning Research 23 120 (2022) 1\u201339."},{"key":"e_1_3_3_3_22_2","unstructured":"Elias Frantar Saleh Ashkboos Torsten Hoefler and Dan Alistarh. 2022. Gptq: Accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2210.17323 (2022)."},{"key":"e_1_3_3_3_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2018.00015"},{"key":"e_1_3_3_3_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2018.00015"},{"key":"e_1_3_3_3_25_2","doi-asserted-by":"crossref","unstructured":"Markus Freitag and Yaser Al-Onaizan. 2017. Beam search strategies for neural machine translation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1702.01806 (2017).","DOI":"10.18653\/v1\/W17-3207"},{"key":"e_1_3_3_3_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3126908.3126972"},{"key":"e_1_3_3_3_27_2","unstructured":"Aaron Grattafiori Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Alex Vaughan et\u00a0al. 2024. The llama 3 herd of models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.21783 (2024)."},{"key":"e_1_3_3_3_28_2","unstructured":"Hui Guan Lin Ning Zhen Lin Xipeng Shen Huiyang Zhou and Seung-Hwan Lim. 2019. In-place zero-space memory protection for cnn. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_3_3_29_2","doi-asserted-by":"crossref","unstructured":"Tahmid Hasan Abhik Bhattacharjee Md\u00a0Saiful Islam Kazi Samin Yuan-Fang Li Yong-Bin Kang M\u00a0Sohel Rahman and Rifat Shahriyar. 2021. XL-sum: Large-scale multilingual abstractive summarization for 44 languages. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2106.13822 (2021).","DOI":"10.18653\/v1\/2021.findings-acl.413"},{"key":"e_1_3_3_3_30_2","unstructured":"Dan Hendrycks Collin Burns Steven Basart Andy Zou Mantas Mazeika Dawn Song and Jacob Steinhardt. 2020. Measuring massive multitask language understanding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2009.03300 (2020)."},{"key":"e_1_3_3_3_31_2","first-page":"497","volume-title":"28th USENIX Security Symposium (USENIX Security 19)","author":"Hong Sanghyun","year":"2019","unstructured":"Sanghyun Hong, Pietro Frigo, Yi\u011fitcan Kaya, Cristiano Giuffrida, and Tudor Dumitra\u0219. 2019. Terminal brain damage: Exposing the graceless degradation in deep neural networks under hardware fault attacks. In 28th USENIX Security Symposium (USENIX Security 19). 497\u2013514."},{"key":"e_1_3_3_3_32_2","unstructured":"Hugging Face. [n. d.]. Generation strategies. Hugging Face Documentation. https:\/\/huggingface.co\/docs\/transformers\/generation_strategies"},{"key":"e_1_3_3_3_33_2","doi-asserted-by":"crossref","unstructured":"Younis Ibrahim Haibin Wang Man Bai Zhi Liu Jianan Wang Zhiming Yang and Zhengming Chen. 2020. Soft Error Resilience of Deep Residual Networks for Object Recognition. IEEE Access 8 (2020) 19490\u201319503.","DOI":"10.1109\/ACCESS.2020.2968129"},{"key":"e_1_3_3_3_34_2","doi-asserted-by":"crossref","unstructured":"Robert\u00a0A Jacobs Michael\u00a0I Jordan Steven\u00a0J Nowlan and Geoffrey\u00a0E Hinton. 1991. Adaptive mixtures of local experts. Neural computation 3 1 (1991) 79\u201387.","DOI":"10.1162\/neco.1991.3.1.79"},{"key":"e_1_3_3_3_35_2","unstructured":"Albert\u00a0Q Jiang Alexandre Sablayrolles Antoine Roux Arthur Mensch Blanche Savary Chris Bamford Devendra\u00a0Singh Chaplot Diego de\u00a0las Casas Emma\u00a0Bou Hanna Florian Bressand et\u00a0al. 2024. Mixtral of experts. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.04088 (2024)."},{"key":"e_1_3_3_3_36_2","volume-title":"Statistical methods in epidemiology","author":"Kahn Harold\u00a0A","year":"1989","unstructured":"Harold\u00a0A Kahn and Christopher\u00a0T Sempos. 1989. Statistical methods in epidemiology. Number\u00a012. Monographs in Epidemiology and."},{"key":"e_1_3_3_3_37_2","doi-asserted-by":"crossref","unstructured":"DJSM Katz J Baptista SP Azen and MC Pike. 1978. Obtaining confidence intervals for the risk ratio in cohort studies. Biometrics (1978) 469\u2013474.","DOI":"10.2307\/2530610"},{"key":"e_1_3_3_3_38_2","unstructured":"Raghuraman Krishnamoorthi. 2018. Quantizing deep convolutional networks for efficient inference: A whitepaper. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1806.08342 (2018)."},{"key":"e_1_3_3_3_39_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2019.00028"},{"key":"e_1_3_3_3_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3126908.3126964"},{"key":"e_1_3_3_3_41_2","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2018.00038"},{"key":"e_1_3_3_3_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3710848.3710870"},{"key":"e_1_3_3_3_43_2","first-page":"74","volume-title":"Text summarization branches out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74\u201381."},{"key":"e_1_3_3_3_44_2","unstructured":"Ji Lin Jiaming Tang Haotian Tang Shang Yang Wei-Ming Chen Wei-Chen Wang Guangxuan Xiao Xingyu Dang Chuang Gan and Song Han. 2024. Awq: Activation-aware weight quantization for on-device llm compression and acceleration. Proceedings of Machine Learning and Systems 6 (2024) 87\u2013100."},{"key":"e_1_3_3_3_45_2","unstructured":"Stephanie Lin Jacob Hilton and Owain Evans. 2021. Truthfulqa: Measuring how models mimic human falsehoods. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2109.07958 (2021)."},{"key":"e_1_3_3_3_46_2","unstructured":"Aixin Liu Bei Feng Bing Xue Bingxuan Wang Bochao Wu Chengda Lu Chenggang Zhao Chengqi Deng Chenyu Zhang Chong Ruan et\u00a0al. 2024. Deepseek-v3 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.19437 (2024)."},{"key":"e_1_3_3_3_47_2","unstructured":"Haoxuan Liu Vasu Singh Micha\u0142 Filipiuk and Siva Kumar\u00a0Sastry Hari. 2024. ALBERTA: ALgorithm-Based Error Resilience in Transformer Architectures. IEEE Open Journal of the Computer Society (2024)."},{"key":"e_1_3_3_3_48_2","unstructured":"Zhenhua Liu Yunhe Wang Kai Han Wei Zhang Siwei Ma and Wen Gao. 2021. Post-training quantization for vision transformer. Advances in Neural Information Processing Systems 34 (2021) 28092\u201328103."},{"key":"e_1_3_3_3_49_2","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651349"},{"key":"e_1_3_3_3_50_2","doi-asserted-by":"publisher","DOI":"10.1109\/ETS56758.2023.10174239"},{"key":"e_1_3_3_3_51_2","doi-asserted-by":"publisher","DOI":"10.1109\/DSN-W50199.2020.00014"},{"key":"e_1_3_3_3_52_2","doi-asserted-by":"publisher","DOI":"10.1109\/DSN-W50199.2020.00014"},{"key":"e_1_3_3_3_53_2","unstructured":"Meta and RAAEC. 2024. Meta-Llama-3.1-8B-Instruct-Summarizer. https:\/\/huggingface.co\/raaec\/Meta-Llama-3.1-8B-Instruct-Summarizer. Accessed: 2025-04-14."},{"key":"e_1_3_3_3_54_2","doi-asserted-by":"publisher","DOI":"10.1109\/AERO.2017.7943882"},{"key":"e_1_3_3_3_55_2","unstructured":"Humza Naveed Asad\u00a0Ullah Khan Shi Qiu Muhammad Saqib Saeed Anwar Muhammad Usman Nick Barnes and Ajmal Mian. 2023. A comprehensive overview of large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.06435 (2023)."},{"key":"e_1_3_3_3_56_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446091"},{"key":"e_1_3_3_3_57_2","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOTS.2017.12"},{"key":"e_1_3_3_3_58_2","doi-asserted-by":"publisher","DOI":"10.1145\/3650200.3656615"},{"key":"e_1_3_3_3_59_2","first-page":"311","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318."},{"key":"e_1_3_3_3_60_2","doi-asserted-by":"crossref","unstructured":"Kexin Pei Yinzhi Cao Junfeng Yang and Suman Jana. 2019. DeepXplore: Automated Whitebox Testing of Deep Learning Systems. Commun. ACM 62 11 (oct 2019) 137\u2013145.","DOI":"10.1145\/3361566"},{"key":"e_1_3_3_3_61_2","unstructured":"Felipe\u00a0Maia Polo Lucas Weber Leshem Choshen Yuekai Sun Gongjun Xu and Mikhail Yurochkin. 2024. tinyBenchmarks: evaluating LLMs with fewer examples. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.14992 (2024)."},{"key":"e_1_3_3_3_62_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-4770"},{"key":"e_1_3_3_3_63_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-2124"},{"key":"e_1_3_3_3_64_2","doi-asserted-by":"publisher","DOI":"10.1145\/3195970.3195997"},{"key":"e_1_3_3_3_65_2","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655688"},{"key":"e_1_3_3_3_66_2","doi-asserted-by":"crossref","unstructured":"Keisuke Sakaguchi Ronan\u00a0Le Bras Chandra Bhagavatula and Yejin Choi. 2021. Winogrande: An adversarial winograd schema challenge at scale. Commun. ACM 64 9 (2021) 99\u2013106.","DOI":"10.1145\/3474381"},{"key":"e_1_3_3_3_67_2","first-page":"205","volume-title":"Computer Safety, Reliability, and Security: 37th International Conference, SAFECOMP 2018, V\u00e4ster\u00e5s, Sweden, September 19-21, 2018, Proceedings 37","author":"Schorn Christoph","year":"2018","unstructured":"Christoph Schorn, Andre Guntoro, and Gerd Ascheid. 2018. Efficient on-line error detection and mitigation for deep neural network accelerators. In Computer Safety, Reliability, and Security: 37th International Conference, SAFECOMP 2018, V\u00e4ster\u00e5s, Sweden, September 19-21, 2018, Proceedings 37. Springer, 205\u2013219."},{"key":"e_1_3_3_3_68_2","unstructured":"Noam Shazeer Azalia Mirhoseini Krzysztof Maziarz Andy Davis Quoc Le Geoffrey Hinton and Jeff Dean. 2017. Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1701.06538 (2017)."},{"key":"e_1_3_3_3_69_2","unstructured":"Yuge Shi Brooks Paige Philip Torr et\u00a0al. 2019. Variational mixture-of-experts autoencoders for multi-modal deep generative models. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_3_3_70_2","doi-asserted-by":"crossref","unstructured":"Ajit Singh. 2025. Meta Llama 4: The Future of Multimodal AI. Available at SSRN 5208228 (2025).","DOI":"10.2139\/ssrn.5208228"},{"key":"e_1_3_3_3_71_2","doi-asserted-by":"crossref","unstructured":"Charles\u00a0W Slayman. 2005. Cache and memory error detection correction and reduction techniques for terrestrial servers and workstations. IEEE Transactions on Device and Materials Reliability 5 3 (2005) 397\u2013404.","DOI":"10.1109\/TDMR.2005.856487"},{"key":"e_1_3_3_3_72_2","doi-asserted-by":"crossref","unstructured":"Yu Sun Zhu Zhu Cherish Mulpuru Roberto Gioiosa Zhao Zhang Bo Fang and Lishan Yang. 2025. FT2: First-Token-Inspired Online Fault Tolerance on Critical Layers for Generative Large Language Models. (2025).","DOI":"10.1145\/3731545.3731570"},{"key":"e_1_3_3_3_73_2","unstructured":"Gemini Team Rohan Anil Sebastian Borgeaud Jean-Baptiste Alayrac Jiahui Yu Radu Soricut Johan Schalkwyk Andrew\u00a0M Dai Anja Hauth Katie Millican et\u00a0al. 2023. Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.11805 (2023)."},{"key":"e_1_3_3_3_74_2","unstructured":"Gemini Team Petko Georgiev Ving\u00a0Ian Lei Ryan Burnell Libin Bai Anmol Gulati Garrett Tanzer Damien Vincent Zhufeng Pan Shibo Wang et\u00a0al. 2024. Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.05530 (2024)."},{"key":"e_1_3_3_3_75_2","unstructured":"TII Team. 2024. The Falcon 3 family of Open Models."},{"key":"e_1_3_3_3_76_2","doi-asserted-by":"publisher","DOI":"10.1145\/3180155.3180220"},{"key":"e_1_3_3_3_77_2","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807666"},{"key":"e_1_3_3_3_78_2","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar Aurelien Rodriguez Armand Joulin Edouard Grave and Guillaume Lample. 2023. LLaMA: Open and Efficient Foundation Language Models. arxiv:https:\/\/arXiv.org\/abs\/2302.13971\u00a0[cs.CL]"},{"key":"e_1_3_3_3_79_2","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_3_80_2","unstructured":"Ben Wang and Aran Komatsuzaki. 2021. GPT-J-6B: A 6 billion parameter autoregressive language model."},{"key":"e_1_3_3_3_81_2","unstructured":"Steven\u00a0Richard Waterhouse. 1998. Classification and regression using mixtures of experts. Ph.\u00a0D. Dissertation. Citeseer."},{"key":"e_1_3_3_3_82_2","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2014.2"},{"key":"e_1_3_3_3_83_2","doi-asserted-by":"crossref","unstructured":"Jason Wei Xuezhi Wang Dale Schuurmans Maarten Bosma Fei Xia Ed Chi Quoc\u00a0V Le Denny Zhou et\u00a0al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems 35 (2022) 24824\u201324837.","DOI":"10.52202\/068431-1800"},{"key":"e_1_3_3_3_84_2","unstructured":"Thomas Wolf Lysandre Debut Victor Sanh Julien Chaumond Clement Delangue Anthony Moi Pierric Cistac Tim Rault R\u00e9mi Louf Morgan Funtowicz et\u00a0al. 2019. Huggingface\u2019s transformers: State-of-the-art natural language processing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1910.03771 (2019)."},{"key":"e_1_3_3_3_85_2","unstructured":"Haoran Xu Young\u00a0Jin Kim Amr Sharaf and Hany\u00a0Hassan Awadalla. 2023. A paradigm shift in machine translation: Boosting translation performance of large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2309.11674 (2023)."},{"key":"e_1_3_3_3_86_2","unstructured":"Runxin Xu Fuli Luo Zhiyuan Zhang Chuanqi Tan Baobao Chang Songfang Huang and Fei Huang. 2021. Raise a child in large language model: Towards effective and generalizable fine-tuning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2109.05687 (2021)."},{"key":"e_1_3_3_3_87_2","unstructured":"An Yang Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chengyuan Li Dayiheng Liu Fei Huang Haoran Wei et\u00a0al. 2024. Qwen2. 5 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.15115 (2024)."},{"key":"e_1_3_3_3_88_2","doi-asserted-by":"crossref","unstructured":"Lishan Yang Bin Nie Adwait Jog and Evgenia Smirni. 2021. Practical Resilience Analysis of GPGPU Applications in the Presence of Single- and Multi-Bit Faults. IEEE Trans. Comput. 70 1 (2021) 30\u201344.","DOI":"10.1109\/TC.2020.2980541"},{"key":"e_1_3_3_3_89_2","doi-asserted-by":"crossref","unstructured":"Lishan Yang Bin Nie Adwait Jog and Evgenia Smirni. 2021. SUGAR: Speeding Up GPGPU Application Resilience Estimation with Input Sizing. Proceedings of the ACM on Measurement and Analysis of Computing Systems 5 1 (2021) 1\u201329.","DOI":"10.1145\/3447375"},{"key":"e_1_3_3_3_90_2","doi-asserted-by":"crossref","unstructured":"Zhewei Yao Reza Yazdani\u00a0Aminabadi Minjia Zhang Xiaoxia Wu Conglong Li and Yuxiong He. 2022. Zeroquant: Efficient and affordable post-training quantization for large-scale transformers. Advances in Neural Information Processing Systems 35 (2022) 27168\u201327183.","DOI":"10.52202\/068431-1970"},{"key":"e_1_3_3_3_91_2","doi-asserted-by":"crossref","unstructured":"Rowan Zellers Ari Holtzman Yonatan Bisk Ali Farhadi and Yejin Choi. 2019. Hellaswag: Can a machine really finish your sentence? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1905.07830 (2019).","DOI":"10.18653\/v1\/P19-1472"},{"key":"e_1_3_3_3_92_2","unstructured":"Shengyu Zhang Linfeng Dong Xiaoya Li Sen Zhang Xiaofei Sun Shuhe Wang Jiwei Li Runyi Hu Tianwei Zhang Fei Wu et\u00a0al. 2023. Instruction tuning for large language models: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.10792 (2023)."},{"key":"e_1_3_3_3_93_2","unstructured":"Zhu Zhu Yu Sun Dhatri Parakal Bo Fang Steven Farrell Gregory\u00a0H Bauer Brett Bode Ian\u00a0T Foster Michael\u00a0E Papka William Gropp et\u00a0al. 2025. Understanding the Landscape of Ampere GPU Memory Errors. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2508.03513 (2025)."}],"event":{"name":"SC '25: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St. Louis MO USA","acronym":"SC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3712285.3759803","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759803","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759803","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T18:27:54Z","timestamp":1773253674000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3712285.3759803"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":92,"alternative-id":["10.1145\/3712285.3759803","10.1145\/3712285"],"URL":"https:\/\/doi.org\/10.1145\/3712285.3759803","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}