{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T15:45:22Z","timestamp":1772725522067,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,12]],"date-time":"2024-08-12T00:00:00Z","timestamp":1723420800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"the Swedish Research Council","award":["2022.03062"],"award-info":[{"award-number":["2022.03062"]}]},{"name":"the European High Performance Computing Joint Undertaking (JU) and Sweden, Finland, Germany, Greece, France, Slovenia, Spain, and the Czech Republic","award":["101093261"],"award-info":[{"award-number":["101093261"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,12]]},"DOI":"10.1145\/3673038.3673110","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T18:29:01Z","timestamp":1723141741000},"page":"199-209","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":18,"title":["Harnessing Integrated CPU-GPU System Memory for HPC: a first look into Grace Hopper"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-6504-7109","authenticated-orcid":false,"given":"Gabin","family":"Schieffer","sequence":"first","affiliation":[{"name":"KTH Royal Institute of Technology, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1669-7714","authenticated-orcid":false,"given":"Jacob","family":"Wahlgren","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5541-433X","authenticated-orcid":false,"given":"Jie","family":"Ren","sequence":"additional","affiliation":[{"name":"William &amp; Mary, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7733-6229","authenticated-orcid":false,"given":"Jennifer","family":"Faj","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4158-3583","authenticated-orcid":false,"given":"Ivy","family":"Peng","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology, Sweden"}]}],"member":"320","published-online":{"date-parts":[[2024,8,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037706"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00023"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3636480.3637284"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368826.3377922"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCHPC49590.2019.00014"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00043"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/e-Science58273.2023.10254803"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322224"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00054"},{"key":"e_1_3_2_1_11_1","volume-title":"Comparing managed memory and ats with and without prefetching on nvidia volta gpus. In 2019 IEEE\/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS)","author":"Gayatri Rahulkumar","unstructured":"Rahulkumar Gayatri, Kevin Gott, and Jack Deslippe. 2019. Comparing managed memory and ats with and without prefetching on nvidia volta gpus. In 2019 IEEE\/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS). IEEE, 41\u201346."},{"key":"e_1_3_2_1_12_1","volume-title":"Early-Adaptor: An Adaptive Framework for Proactive UVM Memory Management. In 2023 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS). IEEE, 248\u2013258","author":"Go Seokjin","year":"2023","unstructured":"Seokjin Go, Hyunwuk Lee, Junsung Kim, Jiwon Lee, Myung\u00a0Kuk Yoon, and Won\u00a0Woo Ro. 2023. Early-Adaptor: An Adaptive Framework for Proactive UVM Memory Management. In 2023 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS). IEEE, 248\u2013258."},{"key":"e_1_3_2_1_13_1","volume-title":"CachedArrays: Optimizing Data Movement for Heterogeneous Memory Systems. 38th IEEE International Parallel and Distributed Processing Symposium (IPDPS).","author":"Hildebrand Mark","year":"2024","unstructured":"Mark Hildebrand, Jason Lowe-Power, and Venkatesh Akella. 2024. CachedArrays: Optimizing Data Movement for Heterogeneous Memory Systems. 38th IEEE International Parallel and Distributed Processing Symposium (IPDPS)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378529"},{"key":"e_1_3_2_1_15_1","unstructured":"Junjie Li Yinzhi Wang Xiao Liang and Hang Liu. 2024. Automatic BLAS Offloading on Unified Memory Architecture: A Study on NVIDIA Grace-Hopper. In Practice and Experience in Advanced Research Computing (PEARC\u201924)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2022.12.004"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389705"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582063"},{"key":"e_1_3_2_1_19_1","unstructured":"Nvidia. 2019. Unified Memory. https:\/\/devblogs.nvidia.com\/unified-memory-in-cuda-6\/."},{"key":"e_1_3_2_1_20_1","unstructured":"Nvidia. 2024. NVIDIA Grace Hopper Superchip Architecture Whitepaper. Whitepaper. Nvidia. Accessed 2024-04-04."},{"key":"e_1_3_2_1_21_1","unstructured":"Nvidia. 2024. NVIDIA Grace Performance Tuning Guide. Technical Report. Nvidia. Accessed 2024-04-01."},{"key":"e_1_3_2_1_22_1","unstructured":"ORNL. 2019. CUDA Unified Memory. https:\/\/www.olcf.ornl.gov\/wp-content\/uploads\/2019\/06\/06_Managed_Memory.pdf."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297663.3310299"},{"key":"e_1_3_2_1_24_1","volume-title":"Exploring application performance on emerging hybrid-memory supercomputers. In 2016 IEEE 18th International Conference on High Performance Computing and Communications","author":"Peng Ivy\u00a0Bo","unstructured":"Ivy\u00a0Bo Peng, Stefano Markidis, Erwin Laure, Gokcen Kestor, and Roberto Gioiosa. 2016. Exploring application performance on emerging hybrid-memory supercomputers. In 2016 IEEE 18th International Conference on High Performance Computing and Communications; IEEE 14th International Conference on Smart City; IEEE 2nd International Conference on Data Science and Systems. IEEE, 473\u2013480."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00036"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.2562110"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447818.3460356"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00057"},{"key":"e_1_3_2_1_29_1","volume-title":"ZeRO-Offload: Democratizing Billion-Scale Model Training. In USENIX Annual Technical Conference.","author":"Ren Jie","year":"2021","unstructured":"Jie Ren, Samyam Rajbhandari, Reza\u00a0Yazdani Aminabadi, Olatunji Ruwase, Shuangyan Yang, Minjia Zhang, Dong Li, and Yuxiong He. 2021. ZeRO-Offload: Democratizing Billion-Scale Model Training. In USENIX Annual Technical Conference."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3636480.3637097"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCHPC56545.2022.00007"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607108"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447818.3460365"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304024"}],"event":{"name":"ICPP '24: the 53rd International Conference on Parallel Processing","location":"Gotland Sweden","acronym":"ICPP '24"},"container-title":["Proceedings of the 53rd International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3673038.3673110","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3673038.3673110","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,23]],"date-time":"2025-09-23T17:33:21Z","timestamp":1758648801000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3673038.3673110"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,12]]},"references-count":34,"alternative-id":["10.1145\/3673038.3673110","10.1145\/3673038"],"URL":"https:\/\/doi.org\/10.1145\/3673038.3673110","relation":{},"subject":[],"published":{"date-parts":[[2024,8,12]]},"assertion":[{"value":"2024-08-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}