{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T12:16:05Z","timestamp":1773317765642,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":97,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3712285.3759817","type":"proceedings-article","created":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T16:04:47Z","timestamp":1762963487000},"page":"2056-2075","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["GPU Lossy Compression for HPC Can Be Versatile and Ultra-Fast"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7370-6766","authenticated-orcid":false,"given":"Yafan","family":"Huang","sequence":"first","affiliation":[{"name":"University of Iowa, Iowa City, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7339-5256","authenticated-orcid":false,"given":"Sheng","family":"Di","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory (ANL), Lemont, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7773-7826","authenticated-orcid":false,"given":"Guanpeng","family":"Li","sequence":"additional","affiliation":[{"name":"University of Florida, Gainesville, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7890-3934","authenticated-orcid":false,"given":"Franck","family":"Cappello","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory (ANL), Lemont, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_3_2_2","unstructured":"[n. d.]. Open Scientific Visualization Datasets. https:\/\/klacansky.com\/open-scivis-datasets\/."},{"key":"e_1_3_3_3_3_2","unstructured":"2020. NVIDIA A100 Tensor Core GPU Architecture. https:\/\/images.nvidia.com\/aem-dam\/en-zz\/Solutions\/data-center\/nvidia-ampere-architecture-whitepaper.pdf."},{"key":"e_1_3_3_3_4_2","unstructured":"2024. Decoding the Enormous Scale of GPT-4: An In-Depth Exploration of the Model\u2019s Size and Abilities. https:\/\/seifeur.com\/chat-gpt-4-data-size\/."},{"key":"e_1_3_3_3_5_2","unstructured":"Marah Abdin Jyoti Aneja Hany Awadalla Ahmed Awadallah Ammar\u00a0Ahmad Awan Nguyen Bach Amit Bahree Arash Bakhtiari Jianmin Bao Harkirat Behl et\u00a0al. 2024. Phi-3 technical report: A highly capable language model locally on your phone. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.14219 (2024)."},{"key":"e_1_3_3_3_6_2","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia\u00a0Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat et\u00a0al. 2023. Gpt-4 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.08774 (2023)."},{"key":"e_1_3_3_3_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/3624062.3625122"},{"key":"e_1_3_3_3_8_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.678"},{"key":"e_1_3_3_3_9_2","doi-asserted-by":"crossref","unstructured":"Ann\u00a0S Almgren John\u00a0B Bell Mike\u00a0J Lijewski Zarija Luki\u0107 and Ethan Van\u00a0Andel. 2013. Nyx: A massively parallel amr code for computational cosmology. The Astrophysical Journal 765 1 (2013) 39.","DOI":"10.1088\/0004-637X\/765\/1\/39"},{"key":"e_1_3_3_3_10_2","unstructured":"A Andonian Q Anthony S Biderman S Black P Gali L Gao E Hallahan J Levy-Kramer C Leahy L Nestler et\u00a0al. [n. d.]. GPT-NeoX: Large Scale Autoregressive Language Modeling in PyTorch 9 2023. URL https:\/\/www. github. com\/eleutherai\/gpt-neox ([n. d.])."},{"key":"e_1_3_3_3_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/DCC.1997.582019"},{"key":"e_1_3_3_3_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3669940.3707280"},{"key":"e_1_3_3_3_13_2","doi-asserted-by":"crossref","unstructured":"Gaurav Bansal Ajith Mascarenhas and Jacqueline\u00a0H Chen. 2015. Direct numerical simulations of autoignition in stratified dimethyl-ether (DME)\/air turbulent mixtures. Combustion and Flame 162 3 (2015) 688\u2013702.","DOI":"10.1016\/j.combustflame.2014.08.021"},{"key":"e_1_3_3_3_14_2","doi-asserted-by":"crossref","unstructured":"Edip Baysal Dan\u00a0D Kosloff and John\u00a0WC Sherwood. 1983. Reverse time migration. Geophysics 48 11 (1983) 1514\u20131524.","DOI":"10.1190\/1.1441434"},{"key":"e_1_3_3_3_15_2","unstructured":"Xiao Bi Deli Chen Guanting Chen Shanhuang Chen Damai Dai Chengqi Deng Honghui Ding Kai Dong Qiushi Du Zhe Fu et\u00a0al. 2024. Deepseek llm: Scaling open-source language models with longtermism. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.02954 (2024)."},{"key":"e_1_3_3_3_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3673038.3673097"},{"key":"e_1_3_3_3_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/956750.956761"},{"key":"e_1_3_3_3_18_2","doi-asserted-by":"crossref","unstructured":"Franck Cappello Sheng Di Sihuan Li Xin Liang Ali\u00a0Murat Gok Dingwen Tao Chun\u00a0Hong Yoon Xin-Chuan Wu Yuri Alexeev and Frederic\u00a0T Chong. 2019. Use cases of lossy compression for floating-point data in scientific data sets. The International Journal of High Performance Computing Applications 33 6 (2019) 1201\u20131220.","DOI":"10.1177\/1094342019853336"},{"key":"e_1_3_3_3_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00095"},{"key":"e_1_3_3_3_20_2","doi-asserted-by":"crossref","unstructured":"Jacqueline\u00a0H Chen Alok Choudhary Bronis De\u00a0Supinski Matthew DeVries Evatt\u00a0R Hawkes Scott Klasky Wei-Keng Liao Kwan-Liu Ma John Mellor-Crummey Norbert Podhorszki et\u00a0al. 2009. Terascale direct numerical simulations of turbulent combustion using S3D. Computational Science & Discovery 2 1 (2009) 015001.","DOI":"10.1088\/1749-4699\/2\/1\/015001"},{"key":"e_1_3_3_3_21_2","doi-asserted-by":"crossref","unstructured":"Miaoqi Chu Jeffrey Li Qingteng Zhang Zhang Jiang Eric\u00a0M Dufresne Alec Sandy Suresh Narayanan and Nicholas Schwarz. 2022. pyXPCSviewer: an open-source interactive tool for X-ray photon correlation spectroscopy visualization and analysis. Synchrotron Radiation 29 4 (2022) 1122\u20131129.","DOI":"10.1107\/S1600577522004830"},{"key":"e_1_3_3_3_22_2","doi-asserted-by":"crossref","unstructured":"Xu Chu Ihab\u00a0F Ilyas and Paraschos Koutris. 2016. Distributed data deduplication. Proceedings of the VLDB Endowment 9 11 (2016) 864\u2013875.","DOI":"10.14778\/2983200.2983203"},{"key":"e_1_3_3_3_23_2","volume-title":"NVIDIA White Paper","author":"Compnay NVIDIA","unstructured":"NVIDIA Compnay. [n. d.]. While Paper: NVIDIA DGX-1 With Tesla V100 System Architecture. In NVIDIA White Paper."},{"key":"e_1_3_3_3_24_2","doi-asserted-by":"crossref","unstructured":"Andrew\u00a0W Cook William Cabot and Paul\u00a0L Miller. 2004. The mixing transition in Rayleigh\u2013Taylor instability. Journal of Fluid Mechanics 511 (2004) 333\u2013362.","DOI":"10.1017\/S0022112004009681"},{"key":"e_1_3_3_3_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2016.11"},{"key":"e_1_3_3_3_26_2","unstructured":"Sheng Di Jinyang Liu Kai Zhao Xin Liang Robert Underwood Zhaorui Zhang Milan Shah Yafan Huang Jiajun Huang Xiaodong Yu et\u00a0al. 2024. A survey on error-bounded lossy compression for scientific datasets. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.02840 (2024)."},{"key":"e_1_3_3_3_27_2","unstructured":"Alex Fallin Noushin Azami Sheng Di Franck Cappello and Martin Burtscher. [n. d.]. Fast and Effective Lossy Compression on GPUs and CPUs with Guaranteed Error Bounds. ([n. d.])."},{"key":"e_1_3_3_3_28_2","unstructured":"Alex Fallin and Martin Burtscher. 2024. Lessons Learned on the Path to Guaranteeing the Error Bound in Lossy Quantizers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.15037 (2024)."},{"key":"e_1_3_3_3_29_2","first-page":"47","volume-title":"SC\u201904: Proceedings of the 2004 ACM\/IEEE conference on Supercomputing","author":"Fan Zhe","year":"2004","unstructured":"Zhe Fan, Feng Qiu, Arie Kaufman, and Suzanne Yoakum-Stover. 2004. GPU cluster for high performance computing. In SC\u201904: Proceedings of the 2004 ACM\/IEEE conference on Supercomputing. IEEE, 47\u201347."},{"key":"e_1_3_3_3_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/1058129.1058148"},{"key":"e_1_3_3_3_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00095"},{"key":"e_1_3_3_3_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3651890.3672233"},{"key":"e_1_3_3_3_33_2","unstructured":"Leo Gao Stella Biderman Sid Black Laurence Golding Travis Hoppe Charles Foster Jason Phang Horace He Anish Thite Noa Nabeshima et\u00a0al. 2020. The Pile: An 800GB Dataset of Diverse Text for Language Modeling. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2101.00027 (2020)."},{"key":"e_1_3_3_3_34_2","doi-asserted-by":"crossref","unstructured":"Ray\u00a0W Grout A Gruber H Kolla P-T Bremer JC Bennett A Gyulassy and JH Chen. 2012. A direct numerical simulation study of turbulence and flame structure in transverse jets analysed in jet-trajectory based coordinates. Journal of Fluid Mechanics 706 (2012) 351\u2013383.","DOI":"10.1017\/jfm.2012.257"},{"key":"e_1_3_3_3_35_2","doi-asserted-by":"crossref","unstructured":"Fan Guo Hui Li William Daughton and Yi-Hsin Liu. 2014. Formation of hard power laws in the energetic particle spectra resulting from relativistic magnetic reconnection. Physical Review Letters 113 15 (2014) 155005.","DOI":"10.1103\/PhysRevLett.113.155005"},{"key":"e_1_3_3_3_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/2503210.2504566"},{"key":"e_1_3_3_3_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/2304576.2304619"},{"key":"e_1_3_3_3_38_2","doi-asserted-by":"publisher","DOI":"10.1145\/3650200.3656636"},{"key":"e_1_3_3_3_39_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00110"},{"key":"e_1_3_3_3_40_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00021"},{"key":"e_1_3_3_3_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/3712285.3759814"},{"key":"e_1_3_3_3_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607048"},{"key":"e_1_3_3_3_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid57682.2023.00066"},{"key":"e_1_3_3_3_44_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS57955.2024.00044"},{"key":"e_1_3_3_3_45_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00021"},{"key":"e_1_3_3_3_46_2","doi-asserted-by":"crossref","unstructured":"Jennifer\u00a0E Kay Clara Deser A Phillips A Mai Cecile Hannay Gary Strand Julie\u00a0Michelle Arblaster SC Bates Gokhan Danabasoglu James Edwards et\u00a0al. 2015. The Community Earth System Model (CESM) large ensemble project: A community resource for studying climate change in the presence of internal climate variability. Bulletin of the American Meteorological Society 96 8 (2015) 1333\u20131349.","DOI":"10.1175\/BAMS-D-13-00255.1"},{"key":"e_1_3_3_3_47_2","doi-asserted-by":"crossref","unstructured":"Jeongnim Kim Andrew\u00a0D Baczewski Todd\u00a0D Beaudet Anouar Benali M\u00a0Chandler Bennett Mark\u00a0A Berrill Nick\u00a0S Blunt Edgar Josu\u00e9\u00a0Landinez Borda Michele Casula David\u00a0M Ceperley et\u00a0al. 2018. QMCPACK: an open source ab initio quantum Monte Carlo package for the electronic structure of atoms molecules and solids. Journal of Physics: Condensed Matter 30 19 (2018) 195901.","DOI":"10.1088\/1361-648X\/aab9c3"},{"key":"e_1_3_3_3_48_2","doi-asserted-by":"publisher","DOI":"10.1109\/PacificVis53943.2022.00017"},{"key":"e_1_3_3_3_49_2","doi-asserted-by":"publisher","DOI":"10.1109\/DCC50243.2021.00018"},{"key":"e_1_3_3_3_50_2","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476224"},{"key":"e_1_3_3_3_51_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS54959.2023.00104"},{"key":"e_1_3_3_3_52_2","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2018.8622520"},{"key":"e_1_3_3_3_53_2","doi-asserted-by":"crossref","unstructured":"Xin Liang Ben Whitney Jieyang Chen Lipeng Wan Qing Liu Dingwen Tao James Kress David Pugmire Matthew Wolf Norbert Podhorszki et\u00a0al. 2021. Mgard+: Optimizing multilevel methods for error-bounded scientific data reduction. IEEE Trans. Comput. 71 7 (2021) 1522\u20131536.","DOI":"10.1109\/TC.2021.3092201"},{"key":"e_1_3_3_3_54_2","doi-asserted-by":"crossref","unstructured":"Guo-Yuan Lien Takemasa Miyoshi Seiya Nishizawa Ryuji Yoshida Hisashi Yashiro Sachiho\u00a0A Adachi Tsuyoshi Yamaura and Hirofumi Tomita. 2017. The near-real-time SCALE-LETKF system: A case of the September 2015 Kanto-Tohoku heavy rainfall. Sola 13 (2017) 1\u20136.","DOI":"10.2151\/sola.2017-001"},{"key":"e_1_3_3_3_55_2","doi-asserted-by":"crossref","unstructured":"Peter Lindstrom. 2014. Fixed-rate compressed floating-point arrays. IEEE transactions on visualization and computer graphics 20 12 (2014) 2674\u20132683.","DOI":"10.1109\/TVCG.2014.2346458"},{"key":"e_1_3_3_3_56_2","volume-title":"Spectral predictors","author":"Lindstrom Peter","year":"2006","unstructured":"Peter Lindstrom, L Ibarria, and J Rossignac. 2006. Spectral predictors. Citeseer."},{"key":"e_1_3_3_3_57_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01383"},{"key":"e_1_3_3_3_58_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00019"},{"key":"e_1_3_3_3_59_2","doi-asserted-by":"publisher","DOI":"10.1145\/3651890.3672274"},{"key":"e_1_3_3_3_60_2","doi-asserted-by":"crossref","unstructured":"Zichang Liu Aditya Desai Fangshuo Liao Weitao Wang Victor Xie Zhaozhuo Xu Anastasios Kyrillidis and Anshumali Shrivastava. 2023. Scissorhands: Exploiting the persistence of importance hypothesis for llm kv cache compression at test time. Advances in Neural Information Processing Systems 36 (2023) 52342\u201352364.","DOI":"10.52202\/075280-2279"},{"key":"e_1_3_3_3_61_2","doi-asserted-by":"publisher","DOI":"10.2172\/1222713"},{"key":"e_1_3_3_3_62_2","doi-asserted-by":"crossref","unstructured":"Huizhang Luo Junqi Wang Qing Liu Jieyang Chen Scott Klasky and Norbert Podhorszki. 2022. Zmesh: theories and methods to exploring application characteristics to improve lossy compression ratio for adaptive mesh refinement. IEEE Transactions on Parallel and Distributed Systems 33 12 (2022) 3702\u20133717.","DOI":"10.1109\/TPDS.2022.3168386"},{"key":"e_1_3_3_3_63_2","doi-asserted-by":"crossref","unstructured":"Sepideh Maleki and Martin Burtscher. 2018. Automatic hierarchical parallelization of linear recurrences. ACM SIGPLAN Notices 53 2 (2018) 128\u2013138.","DOI":"10.1145\/3296957.3173168"},{"key":"e_1_3_3_3_64_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-69583-4_12"},{"key":"e_1_3_3_3_65_2","unstructured":"Duane Merrill and Michael Garland. 2016. Single-pass parallel prefix scan with decoupled look-back. NVIDIA Tech. Rep. NVR-2016-002 (2016)."},{"key":"e_1_3_3_3_66_2","first-page":"7937","volume-title":"International Conference on Machine Learning","author":"Narayanan Deepak","year":"2021","unstructured":"Deepak Narayanan, Amar Phanishayee, Kaiyu Shi, Xie Chen, and Matei Zaharia. 2021. Memory-efficient pipeline-parallel dnn training. In International Conference on Machine Learning. PMLR, 7937\u20137947."},{"key":"e_1_3_3_3_67_2","unstructured":"Piotr Nawrot Adrian \u0141a\u0144cucki Marcin Chochowski David Tarjan and Edoardo\u00a0M Ponti. 2024. Dynamic memory compression: Retrofitting llms for accelerated inference. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.09636 (2024)."},{"key":"e_1_3_3_3_68_2","doi-asserted-by":"publisher","DOI":"10.1145\/1964179.1964189"},{"key":"e_1_3_3_3_69_2","first-page":"311","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318."},{"key":"e_1_3_3_3_70_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2013.17"},{"key":"e_1_3_3_3_71_2","unstructured":"Guanqiao Qu Qiyuan Chen Wei Wei Zheng Lin Xianhao Chen and Kaibin Huang. 2025. Mobile edge intelligence for large language models: A contemporary survey. IEEE Communications Surveys & Tutorials (2025)."},{"key":"e_1_3_3_3_72_2","first-page":"117","volume-title":"Eurographics (State of the Art Reports)","author":"Rodr\u00edguez Marcos\u00a0Balsa","year":"2013","unstructured":"Marcos\u00a0Balsa Rodr\u00edguez, Enrico Gobbetti, Jos\u00e9 Antonio\u00a0Iglesias Guiti\u00e1n, Maxim Makhinya, Fabio Marton, Renato Pajarola, and Susanne\u00a0K Suter. 2013. A Survey of Compressed GPU-Based Direct Volume Rendering.. In Eurographics (State of the Art Reports). 117\u2013136."},{"key":"e_1_3_3_3_73_2","unstructured":"Robert Ross Lee Ward Philip Carns Gary Grider Scott Klasky Quincey Koziol Glenn\u00a0K Lockwood Kathryn Mohror Bradley Settlemyer and Matthew Wolf. 2019. Storage systems and i\/o: Organizing storing and accessing data for scientific discovery. Department of Energy Office of Science Tech. Rep (2019)."},{"key":"e_1_3_3_3_74_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS54959.2023.00081"},{"key":"e_1_3_3_3_75_2","unstructured":"Arman Shehabi Alex Hubbard Alex Newkirk Nuoa Lei Md\u00a0Abu\u00a0Bakkar Siddik Billie Holecek Jonathan Koomey Eric Masanet Dale Sartor et\u00a0al. 2024. 2024 United States Data Center Energy Usage Report. (2024)."},{"key":"e_1_3_3_3_76_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00010"},{"key":"e_1_3_3_3_77_2","doi-asserted-by":"publisher","DOI":"10.1145\/3625549.3658691"},{"key":"e_1_3_3_3_78_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS64566.2025.00084"},{"key":"e_1_3_3_3_79_2","doi-asserted-by":"crossref","unstructured":"Sebastian Strempfer Zichao\u00a0Wendy Di Kazutomo Yoshii Yue Cao Qingteng Zhang Eric\u00a0M Dufresne Mathew Cherukara Suresh Narayanan Martin\u00a0V Holt Antonino Miceli et\u00a0al. 2025. Homomorphic data compression for real time photon correlation analysis. Optics Express 33 5 (2025) 12059\u201312070.","DOI":"10.1364\/OE.543404"},{"key":"e_1_3_3_3_80_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2017.115"},{"key":"e_1_3_3_3_81_2","doi-asserted-by":"publisher","DOI":"10.1051\/epjconf\/202429513002"},{"key":"e_1_3_3_3_82_2","doi-asserted-by":"publisher","DOI":"10.1145\/3410463.3414624"},{"key":"e_1_3_3_3_83_2","doi-asserted-by":"crossref","unstructured":"Marat Valiev Eric\u00a0J Bylaska Niranjan Govind Karol Kowalski Tjerk\u00a0P Straatsma Hubertus Johannes\u00a0Jacobus Van\u00a0Dam Dunyou Wang Jarek Nieplocha Edoardo Apr\u00e0 Theresa\u00a0L Windus et\u00a0al. 2010. NWChem: A comprehensive and scalable open-source solution for large scale molecular simulations. Computer Physics Communications 181 9 (2010) 1477\u20131489.","DOI":"10.1016\/j.cpc.2010.04.018"},{"key":"e_1_3_3_3_84_2","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_3_85_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD63220.2024.00036"},{"key":"e_1_3_3_3_86_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00055"},{"key":"e_1_3_3_3_87_2","doi-asserted-by":"crossref","unstructured":"Adam Weingram Yuke Li Hao Qi Darren Ng Liuyao Dai and Xiaoyi Lu. 2023. xCCL: A survey of industry-led collective communication libraries for deep learning. Journal of Computer Science and Technology 38 1 (2023) 166\u2013195.","DOI":"10.1007\/s11390-023-2894-6"},{"key":"e_1_3_3_3_88_2","unstructured":"Thomas Wolf Lysandre Debut Victor Sanh Julien Chaumond Clement Delangue Anthony Moi Pierric Cistac Tim Rault R\u00e9mi Louf Morgan Funtowicz et\u00a0al. 2019. Huggingface\u2019s transformers: State-of-the-art natural language processing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1910.03771 (2019)."},{"key":"e_1_3_3_3_89_2","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356155"},{"key":"e_1_3_3_3_90_2","doi-asserted-by":"publisher","DOI":"10.1145\/2442516.2442539"},{"key":"e_1_3_3_3_91_2","doi-asserted-by":"crossref","unstructured":"PK Yeung DA Donzis and KR Sreenivasan. 2012. Dissipation enstrophy and pressure statistics in turbulence simulations at high Reynolds numbers. Journal of Fluid Mechanics 700 (2012) 5\u201315.","DOI":"10.1017\/jfm.2012.5"},{"key":"e_1_3_3_3_92_2","doi-asserted-by":"publisher","DOI":"10.1145\/3502181.3531473"},{"key":"e_1_3_3_3_93_2","unstructured":"Boyuan Zhang Bo Fang Fanjiang Ye Yida Gu Nathan Tallent Guangming Tan and Dingwen Tao. 2024. Overcoming memory constraints in quantum circuit simulation with a high-fidelity compression framework. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.14088 (2024)."},{"key":"e_1_3_3_3_94_2","doi-asserted-by":"crossref","unstructured":"Boyuan Zhang Yafan Huang Sheng Di Fengguang Song Guanpeng Li and Franck Cappello. 2025. Pushing the Limits of GPU Lossy Compression: A Hierarchical Delta Approach. (2025).","DOI":"10.1145\/3721145.3725743"},{"key":"e_1_3_3_3_95_2","doi-asserted-by":"publisher","DOI":"10.1145\/3588195.3592994"},{"key":"e_1_3_3_3_96_2","doi-asserted-by":"publisher","DOI":"10.1109\/BigData50022.2020.9378449"},{"key":"e_1_3_3_3_97_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-07312-0_1"},{"key":"e_1_3_3_3_98_2","doi-asserted-by":"publisher","DOI":"10.23919\/ISC.2024.10528931"}],"event":{"name":"SC '25: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St. Louis MO USA","acronym":"SC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759817","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T18:24:15Z","timestamp":1773253455000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3712285.3759817"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":97,"alternative-id":["10.1145\/3712285.3759817","10.1145\/3712285"],"URL":"https:\/\/doi.org\/10.1145\/3712285.3759817","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}