{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T00:05:06Z","timestamp":1780445106936,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T00:00:00Z","timestamp":1750377600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2341039,2114514"],"award-info":[{"award-number":["2341039,2114514"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,21]]},"DOI":"10.1145\/3695053.3731047","type":"proceedings-article","created":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T16:43:11Z","timestamp":1750437791000},"page":"137-152","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Forest: Access-aware GPU UVM Management"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1460-0766","authenticated-orcid":false,"given":"Mao","family":"Lin","sequence":"first","affiliation":[{"name":"University of California, Merced, Merced, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2451-9568","authenticated-orcid":false,"given":"Yuan","family":"Feng","sequence":"additional","affiliation":[{"name":"University of California, Merced, Merced, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8292-4554","authenticated-orcid":false,"given":"Guilherme","family":"Cox","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1767-8198","authenticated-orcid":false,"given":"Hyeran","family":"Jeon","sequence":"additional","affiliation":[{"name":"University of California, Merced, Merced, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,6,20]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.1996.501191"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00023"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3480855"},{"key":"e_1_3_3_1_5_2","unstructured":"AMD Corporation. Accessed June 2024. Radeons Next-generation Vega Architecture. https:\/\/en.wikichip.org\/w\/images\/a\/a1\/vega-whitepaper.pdf."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","unstructured":"Grant Ayers Heiner Litz Christos Kozyrakis and Parthasarathy Ranganathan. 2020. Classifying Memory Access Patterns for Prefetching(ASPLOS \u201920). Association for Computing Machinery New York NY USA 513\u2013526. 10.1145\/3373376.3378498","DOI":"10.1145\/3373376.3378498"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00021"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Gloria\u00a0B Barrett. 2000. The Coefficient of Determination: Understanding r squared and R squared. The Mathematics Teacher 93 3 (2000) 230\u2013234.","DOI":"10.5951\/MT.93.3.0230"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2018.8573485"},{"key":"e_1_3_3_1_10_2","unstructured":"Alexey Bochkovskiy Chien-Yao Wang and Hong-Yuan\u00a0Mark Liao. 2020. Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2004.10934 (2020)."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3456727.3463766"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/MCHPC49590.2019.00014"},{"key":"e_1_3_3_1_13_2","first-page":"625","volume-title":"2022 USENIX Annual Technical Conference (USENIX ATC 22)","author":"Choi Sangjin","year":"2022","unstructured":"Sangjin Choi, Taeksoo Kim, Jinwoo Jeong, Rachata Ausavarungnirun, Myeongjae Jeon, Youngjin Kwon, and Jeongseob Ahn. 2022. Memory harvesting in { Multi-GPU} systems with hierarchical unified virtual memory. In 2022 USENIX Annual Technical Conference (USENIX ATC 22). USENIX Association, Carlsbad, CA, 625\u2013638. https:\/\/www.usenix.org\/conference\/atc22\/presentation\/choi-sangjin"},{"key":"e_1_3_3_1_14_2","first-page":"25 \u2013 37","volume-title":"Proceedings of the International Conference on Supercomputing","author":"Cooper Bennett","year":"2024","unstructured":"Bennett Cooper, Thomas R.\u00a0W. Scogland, and Rong Ge. 2024. Shared Virtual Memory: Its Design and Performance Implication for Diverse Applications. In Proceedings of the International Conference on Supercomputing (Kyoto, Japan). 25 \u2013 37."},{"key":"e_1_3_3_1_15_2","unstructured":"NVIDIA Corporation. Accessed Feb 2025. UVM GPU non-replayable faults. https:\/\/github.com\/NVIDIA\/open-gpu-kernel-modules\/blob\/main\/kernel-open\/nvidia-uvm\/uvm_gpu_non_replayable_faults.c."},{"key":"e_1_3_3_1_16_2","unstructured":"NVIDIA Corporation. Accessed February 2025. NVIDIA H100 Tensor Core GPU Architecture. https:\/\/resources.nvidia.com\/en-us-data-center-overview\/gtc22-whitepaper-hopper."},{"key":"e_1_3_3_1_17_2","unstructured":"NVIDIA Corporation. Accessed June 2024. NVIDIA Pascal Architecture. https:\/\/www.nvidia.com\/en-us\/data-center\/pascal-gpu-architecture\/."},{"key":"e_1_3_3_1_18_2","unstructured":"NVIDIA Corporation. Accessed November 2024. NVIDIA Grace Hopper Superchip Architecture Whitepaper. https:\/\/resources.nvidia.com\/en-us-grace-cpu\/nvidia-grace-hopper."},{"key":"e_1_3_3_1_19_2","unstructured":"NVIDIA Corporation. Accessed November 2024. Tree-based Prefetcher for ATS (Grace Hopper). https:\/\/github.com\/NVIDIA\/open-gpu-kernel-modules\/blob\/main\/kernel-open\/nvidia-uvm\/uvm_ats_faults.c#L381-L464."},{"key":"e_1_3_3_1_20_2","unstructured":"NVIDIA Corporation. Accessed October 2024. Table of Technical Specifications per Compute Capability. https:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/index.html#features-and-technical-specifications-technical-specifications-per-compute-capability."},{"key":"e_1_3_3_1_21_2","unstructured":"Jacob Devlin. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1810.04805 (2018)."},{"key":"e_1_3_3_1_22_2","unstructured":"Jacob Devlin Ming-Wei Chang Kenton Lee and Kristina Toutanova. 2018. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. CoRR abs\/1810.04805 (2018). arxiv:https:\/\/arXiv.org\/abs\/1810.04805http:\/\/arxiv.org\/abs\/1810.04805"},{"key":"e_1_3_3_1_23_2","unstructured":"Xianzhong Ding Yunkai Zhang Binbin Chen Donghao Ying Tieying Zhang Jianjun Chen Lei Zhang Alberto Cerpa and Wan Du. 2023. Vmr2l: Virtual machines rescheduling using reinforcement learning in data centers."},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3689031.3717476"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA59077.2024.00065"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.23919\/DATE51398.2021.9473982"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322224"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00054"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","unstructured":"Amir Gholami Zhewei Yao Sehoon Kim Coleman Hooper Michael\u00a0W. Mahoney and Kurt Keutzer. 2024. AI and Memory Wall. IEEE Micro 44 3 (2024) 33\u201339. 10.1109\/MM.2024.3373763","DOI":"10.1109\/MM.2024.3373763"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS57527.2023.00032"},{"key":"e_1_3_3_1_31_2","unstructured":"Yongbin Gu Wenxuan Wu Yunfan Li and Lizhong Chen. 2020. Uvmbench: A comprehensive benchmark suite for researching unified virtual memory in gpus. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2007.09822 (2020)."},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540730"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-38747-0_3"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","unstructured":"Doug Joseph and Dirk Grunwald. 1997. Prefetching using Markov predictors. SIGARCH Comput. Archit. News 25 2 (May 1997) 252\u2013263. 10.1145\/384286.264207","DOI":"10.1145\/384286.264207"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575736"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2019.00021"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00047"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378529"},{"key":"e_1_3_3_1_40_2","unstructured":"Alex Krizhevsky Ilya Sutskever and Geoffrey\u00a0E. Hinton. 2012. ImageNet classification with deep convolutional neural networks. (2012) 1097\u20131105."},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","unstructured":"An-Chow Lai Cem Fide and Babak Falsafi. 2001. Dead-block prediction & dead-block correlating prefetchers. (2001) 144\u2013154. 10.1145\/379240.379259","DOI":"10.1145\/379240.379259"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614269"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071054"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480083"},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304044"},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356141"},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"publisher","DOI":"10.1145\/3721146.3721955"},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582044"},{"key":"e_1_3_3_1_49_2","doi-asserted-by":"crossref","unstructured":"Yechen Liu Timothy Rogers and Clayton Hughes. 2022. Unified Memory: GPGPU-Sim\/UVM Smart Integration. SANDIA REPORT (2022). https:\/\/www.osti.gov\/servlets\/purl\/1844477\/","DOI":"10.2172\/1844477"},{"key":"e_1_3_3_1_50_2","doi-asserted-by":"publisher","unstructured":"Seung\u00a0Won Min Kun Wu Sitao Huang Mert Hidayeto\u011flu Jinjun Xiong Eiman Ebrahimi Deming Chen and Wen-mei Hwu. 2021. Large graph convolutional network training with GPU-oriented data communication architecture. Proc. VLDB Endow. 14 11 (July 2021) 2087\u20132100. 10.14778\/3476249.3476264","DOI":"10.14778\/3476249.3476264"},{"key":"e_1_3_3_1_51_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3623782"},{"key":"e_1_3_3_1_52_2","unstructured":"NVIDIA Corporation. Accessed June 2024. NVIDIA Linux open GPU kernel module source. https:\/\/github.com\/NVIDIA\/open-gpu-kernel-modules."},{"key":"e_1_3_3_1_53_2","series-title":"(ICML\u201923)","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"Radford Alec","year":"2023","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2023. Robust speech recognition via large-scale weak supervision. In Proceedings of the 40th International Conference on Machine Learning (Honolulu, Hawaii, USA) (ICML\u201923). JMLR.org, Article 1182, 27\u00a0pages."},{"key":"e_1_3_3_1_54_2","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476205"},{"key":"e_1_3_3_1_55_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA57654.2024.00066"},{"key":"e_1_3_3_1_56_2","unstructured":"Tor\u00a0Aamodt\u2019s research group. Accessed July 2024. GPGPU-sim distribution. https:\/\/github.com\/gpgpu-sim\/gpgpu-sim_distribution\/blob\/master\/configs\/tested-cfgs\/SM75_RTX2060\/gpgpusim.config."},{"key":"e_1_3_3_1_57_2","unstructured":"Nikolay Sakharnykh. Accessed June 2024. Unified memory on pascal and volta. https:\/\/on-demand.gputechconf.com\/gtc\/2017\/presentation\/s7285-nikolay-sakharnykh-unified-memory-on-pascal-and-volta.pdf."},{"key":"e_1_3_3_1_58_2","doi-asserted-by":"crossref","unstructured":"Gabin Schieffer Jacob Wahlgren Jie Ren Jennifer Faj and Ivy Peng. 2024. Harnessing Integrated CPU-GPU System Memory for HPC: a first look into Grace Hopper. International Conference on Parallel Processing (2024).","DOI":"10.1145\/3673038.3673110"},{"key":"e_1_3_3_1_59_2","unstructured":"Accel-Sim team. Accessed October 2024. GPGPU-sim distribution. https:\/\/github.com\/accel-sim\/gpgpu-sim_distribution\/blob\/dev-uvm\/configs\/tested-cfgs\/SM86_RTX3070\/gpgpusim.config."},{"key":"e_1_3_3_1_60_2","unstructured":"Accel-Sim team. Accessed October 2024. GPGPU-sim distribution (Accel-sim). https:\/\/github.com\/accel-sim\/gpgpu-sim_distribution\/blob\/dev-uvm\/configs\/tested-cfgs\/SM7_QV100\/gpgpusim.config."},{"key":"e_1_3_3_1_61_2","doi-asserted-by":"crossref","unstructured":"Mark Weiser. 1984. Program slicing. IEEE Transactions on software engineering4 (1984) 352\u2013357.","DOI":"10.1109\/TSE.1984.5010248"},{"key":"e_1_3_3_1_62_2","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322225"},{"key":"e_1_3_3_1_63_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3614309"},{"key":"e_1_3_3_1_64_2","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640396"}],"event":{"name":"ISCA '25: Proceedings of the 52nd Annual International Symposium on Computer Architecture","location":"Tokyo Japan","acronym":"SIGARCH '25","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 52nd Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3695053.3731047","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3695053.3731047","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T10:57:08Z","timestamp":1750503428000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3695053.3731047"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,20]]},"references-count":63,"alternative-id":["10.1145\/3695053.3731047","10.1145\/3695053"],"URL":"https:\/\/doi.org\/10.1145\/3695053.3731047","relation":{},"subject":[],"published":{"date-parts":[[2025,6,20]]},"assertion":[{"value":"2025-06-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}