{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T16:52:17Z","timestamp":1771951937119,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,30]],"date-time":"2024-05-30T00:00:00Z","timestamp":1717027200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,30]]},"DOI":"10.1145\/3650200.3656608","type":"proceedings-article","created":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T14:11:54Z","timestamp":1717423914000},"page":"26-37","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Shared Virtual Memory: Its Design and Performance Implications for Diverse Applications"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-0375-9418","authenticated-orcid":false,"given":"Bennett","family":"Cooper","sequence":"first","affiliation":[{"name":"Clemson University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7234-5743","authenticated-orcid":false,"given":"Thomas RW","family":"Scogland","sequence":"additional","affiliation":[{"name":"Lawrence Livermore National Lab, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2218-3675","authenticated-orcid":false,"given":"Rong","family":"Ge","sequence":"additional","affiliation":[{"name":"Clemson University, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,6,3]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Demystifying GPU UVM Cost with Deep Runtime and Workload Analysis. In 2021 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 141\u2013150","author":"Allen Tyler","year":"2021","unstructured":"Tyler Allen and Rong Ge. 2021. Demystifying GPU UVM Cost with Deep Runtime and Workload Analysis. In 2021 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 141\u2013150."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3480855"},{"key":"e_1_3_2_1_3_1","unstructured":"AMD. 2023. AMD ROCm\u2122 documentation. https:\/\/rocm.docs.amd.com\/en\/latest\/"},{"key":"e_1_3_2_1_4_1","unstructured":"AMD. 2023. rocBLAS Documentation. https:\/\/rocblas.readthedocs.io\/en\/master\/index.html"},{"key":"e_1_3_2_1_5_1","unstructured":"AMD. 2024. AMD Instinct\u2122 MI300 Series Accelerators. https:\/\/www.amd.com\/en\/products\/accelerators\/instinct\/mi300.html"},{"key":"e_1_3_2_1_6_1","volume-title":"RAJA: Portable Performance for Large-Scale Scientific Applications. In 2019 IEEE\/ACM International Workshop on Performance, Portability and Productivity in HPC (P3HPC). 71\u201381","author":"Beckingsale A.","year":"2019","unstructured":"David\u00a0A. Beckingsale, Jason Burmark, Rich Hornung, 2019. RAJA: Portable Performance for Large-Scale Scientific Applications. In 2019 IEEE\/ACM International Workshop on Performance, Portability and Productivity in HPC (P3HPC). 71\u201381."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.5555\/3571885.3571982"},{"key":"e_1_3_2_1_8_1","volume-title":"Language models are few-shot learners. Advances in neural information processing systems 33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020), 1877\u20131901."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2014.07.003"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3456727.3463766"},{"key":"e_1_3_2_1_11_1","volume-title":"Performance Evaluation of Advanced Features in CUDA Unified Memory. In 2019 IEEE\/ACM Workshop on Memory Centric High Performance Computing (MCHPC). 50\u201357","author":"Chien Steven","year":"2019","unstructured":"Steven Chien, Ivy Peng, and Stefano Markidis. 2019. Performance Evaluation of Advanced Features in CUDA Unified Memory. In 2019 IEEE\/ACM Workshop on Memory Centric High Performance Computing (MCHPC). 50\u201357."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10586-022-03805-x"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322224"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Rahulkumar Gayatri Kevin Gott and Jack Deslippe. 2019. Comparing Managed Memory and ATS with and without Prefetching on NVIDIA Volta GPUs. In 2019 IEEE\/ACM Performance Modeling Benchmarking and Simulation of High Performance Computer Systems (PMBS). 41\u201346.","DOI":"10.1109\/PMBS49563.2019.00010"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CSA.2008.11"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1089014.1089021"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","unstructured":"Richard\u00a0D. Hornung and Holger\u00a0E. Hones. 2017. RAJA Performance Suite. [Computer Software] https:\/\/doi.org\/10.11578\/dc.20201001.36. https:\/\/doi.org\/10.11578\/dc.20201001.36","DOI":"10.11578\/dc.20201001.36"},{"key":"e_1_3_2_1_18_1","unstructured":"John Hubbard Gonzalo Brito Chirayu Garg 2023. Simplifying GPU application development with heterogeneous memory management. https:\/\/developer.nvidia.com\/blog\/simplifying-gpu-application-development-with-heterogeneous-memory-management\/"},{"key":"e_1_3_2_1_19_1","unstructured":"John Hubbard and Jerome Glisee. 2017. GPUs: HMM: Heterogeneous Memory Management. https:\/\/www.redhat.com\/files\/summit\/session-assets\/2017\/S104078-hubbard.pdf"},{"key":"e_1_3_2_1_20_1","volume-title":"CLOCK-Pro: An Effective Improvement of the CLOCK Replacement. In 2005 USENIX Annual Technical Conference (USENIX ATC 05)","author":"Jiang Song","year":"2005","unstructured":"Song Jiang, Feng Chen, and Xiaodong Zhang. 2005. CLOCK-Pro: An Effective Improvement of the CLOCK Replacement. In 2005 USENIX Annual Technical Conference (USENIX ATC 05). USENIX Association, Anaheim, CA."},{"key":"e_1_3_2_1_21_1","volume-title":"Evaluating Unified Memory Performance in HIP. In 2022 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW). 562\u2013568","author":"Jin Zheming","year":"2022","unstructured":"Zheming Jin and Jeffrey\u00a0S. Vetter. 2022. Evaluating Unified Memory Performance in HIP. In 2022 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW). 562\u2013568."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575736"},{"key":"e_1_3_2_1_23_1","unstructured":"Khronos Group. 2023. Open Standard for Parallel Programming of Heterogeneous Systems. https:\/\/www.khronos.org\/api\/opencl"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378529"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-019-02966-8"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2014.7040988"},{"key":"e_1_3_2_1_27_1","unstructured":"Lawrence Livermore National Lab. 2022. Tioga. https:\/\/hpc.llnl.gov\/hardware\/compute-platforms\/tioga"},{"key":"e_1_3_2_1_28_1","volume-title":"El Capitan: Preparing for NNSA\u2019s first exascale machine. https:\/\/asc.llnl.gov\/exascale\/el-capitan","author":"Lawrence Livermore National Lab.","year":"2023","unstructured":"Lawrence Livermore National Lab. 2023. El Capitan: Preparing for NNSA\u2019s first exascale machine. https:\/\/asc.llnl.gov\/exascale\/el-capitan"},{"key":"e_1_3_2_1_29_1","volume-title":"BLOOM: A 176B-Parameter Open-Access Multilingual Language Model. arxiv:2211.05100\u00a0[cs.CL]","author":"Le\u00a0Scao Teven","year":"2023","unstructured":"Teven Le\u00a0Scao, Angela Fan, Christopher Akiki, 2023. BLOOM: A 176B-Parameter Open-Access Multilingual Language Model. arxiv:2211.05100\u00a0[cs.CL]"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00044"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304044"},{"key":"e_1_3_2_1_32_1","unstructured":"Linux Kernel Development Community. 2023. Heterogeneous Memory Management (HMM). https:\/\/www.kernel.org\/doc\/html\/latest\/mm\/hmm.html"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589349"},{"key":"e_1_3_2_1_34_1","volume-title":"PyTorch-Direct: Enabling GPU Centric Data Access for Very Large Graph Neural Network Training with Irregular Accesses. CoRR abs\/2101.07956","author":"Min Seungwon","year":"2021","unstructured":"Seungwon Min, Kun Wu, Sitao Huang, 2021. PyTorch-Direct: Enabling GPU Centric Data Access for Very Large Graph Neural Network Training with Irregular Accesses. CoRR abs\/2101.07956 (2021). arXiv:2101.07956"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.14778\/3425879.3425883"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2015.2463813"},{"key":"e_1_3_2_1_37_1","unstructured":"Oak Ridge National Lab. 2022. Frontier User Guide - OLCF User Documentation. https:\/\/docs.olcf.ornl.gov\/systems\/frontier_user_guide.html"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3577193.3593703"},{"key":"e_1_3_2_1_39_1","volume-title":"2005 Ottawa Linux Symposium","author":"Prasad Vara","year":"2005","unstructured":"Vara Prasad, William Cohen, FC Eigler, 2005. Locating system problems using dynamic instrumentation. In 2005 Ottawa Linux Symposium. New York, NY: IEEE, 49\u201364."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/HCS55958.2022.9895477"},{"key":"e_1_3_2_1_41_1","unstructured":"Top500. 2023. November 2023. https:\/\/top500.org\/lists\/top500\/2023\/11\/"},{"key":"e_1_3_2_1_42_1","unstructured":"Unified Acceleration Foundation. 2023. oneAPI. https:\/\/www.oneapi.io\/spec\/"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00063"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3196008"},{"key":"e_1_3_2_1_46_1","first-page":"4","article-title":"A quantitative evaluation of unified memory in GPUs","volume":"76","author":"Yu Qi","year":"2019","unstructured":"Qi Yu, Bruce Childers, Libo Huang, 2019. A quantitative evaluation of unified memory in GPUs. The Journal of Supercomputing 76, 4 (nov 2019), 2958\u20132985.","journal-title":"The Journal of Supercomputing"}],"event":{"name":"ICS '24: 2024 International Conference on Supercomputing","location":"Kyoto Japan","acronym":"ICS '24","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 38th ACM International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3650200.3656608","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3650200.3656608","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T15:23:38Z","timestamp":1755876218000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3650200.3656608"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,30]]},"references-count":46,"alternative-id":["10.1145\/3650200.3656608","10.1145\/3650200"],"URL":"https:\/\/doi.org\/10.1145\/3650200.3656608","relation":{},"subject":[],"published":{"date-parts":[[2024,5,30]]},"assertion":[{"value":"2024-06-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}