{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T23:14:51Z","timestamp":1776122091733,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,3,9]],"date-time":"2020-03-09T00:00:00Z","timestamp":1583712000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,3,9]]},"DOI":"10.1145\/3373376.3378529","type":"proceedings-article","created":{"date-parts":[[2020,3,13]],"date-time":"2020-03-13T22:37:01Z","timestamp":1584139021000},"page":"1357-1370","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":77,"title":["Batch-Aware Unified Memory Management in GPUs for Irregular Workloads"],"prefix":"10.1145","author":[{"given":"Hyojong","family":"Kim","sequence":"first","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"Jaewoong","family":"Sim","sequence":"additional","affiliation":[{"name":"Intel Labs, Portland, OR, USA"}]},{"given":"Prasun","family":"Gera","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"Ramyad","family":"Hadidi","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"Hyesoon","family":"Kim","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]}],"member":"320","published-online":{"date-parts":[[2020,3,13]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Advanced Micro Devices Inc. 2012. AMD Graphics Cores Next (GCN) Architecture . https:\/\/www.amd.com\/Documents\/GCN_Architecture_whitepaper.pdf .  Advanced Micro Devices Inc. 2012. AMD Graphics Cores Next (GCN) Architecture . https:\/\/www.amd.com\/Documents\/GCN_Architecture_whitepaper.pdf ."},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) .","author":"Agarwal Neha","unstructured":"Neha Agarwal , David Nellans , Mike O'Connor , Stephen W. Keckler , and Thomas F. Wenisch . 2015a. Unlocking Bandwidth for GPUs in CC-NUMA Systems . In Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) . Neha Agarwal, David Nellans, Mike O'Connor, Stephen W. Keckler, and Thomas F. Wenisch. 2015a. Unlocking Bandwidth for GPUs in CC-NUMA Systems . In Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) ."},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of the International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) .","author":"Agarwal Neha","unstructured":"Neha Agarwal , David Nellans , Mark Stephenson , Mike O'Connor , and Stephen W. Keckler . 2015b. Page Placement Strategies for GPUs Within Heterogeneous Memory Systems . In Proceedings of the International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) . Neha Agarwal, David Nellans, Mark Stephenson, Mike O'Connor, and Stephen W. Keckler. 2015b. Page Placement Strategies for GPUs Within Heterogeneous Memory Systems. In Proceedings of the International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) ."},{"key":"e_1_3_2_1_4_1","unstructured":"Nikoly Akhenykh. 2017. Unified Memory On Pascal and Volta . http:\/\/on-demand.gputechconf.com\/gtc\/2017\/presentation\/s7285-nikolay-sakharnykh-unified-memory-on-pascal-and-volta.pdf .  Nikoly Akhenykh. 2017. Unified Memory On Pascal and Volta . http:\/\/on-demand.gputechconf.com\/gtc\/2017\/presentation\/s7285-nikolay-sakharnykh-unified-memory-on-pascal-and-volta.pdf ."},{"key":"e_1_3_2_1_5_1","unstructured":"Nikoly Akhenykh. 2018. Everything You Need to Know About Unified Memory . http:\/\/on-demand.gputechconf.com\/gtc\/2018\/presentation\/s8430-everything-you-need-to-know-about-unified-memory.pdf .  Nikoly Akhenykh. 2018. Everything You Need to Know About Unified Memory . http:\/\/on-demand.gputechconf.com\/gtc\/2018\/presentation\/s8430-everything-you-need-to-know-about-unified-memory.pdf ."},{"key":"e_1_3_2_1_6_1","unstructured":"AMD. 2011. AMD Accelerated Processing Units . https:\/\/www.amd.com\/us\/products\/technologies\/apu\/Pages\/apu.aspx .  AMD. 2011. AMD Accelerated Processing Units . https:\/\/www.amd.com\/us\/products\/technologies\/apu\/Pages\/apu.aspx ."},{"key":"e_1_3_2_1_7_1","unstructured":"AMD. 2012. AMD Graphics Cores Next (GCN) Architecture . https:\/\/www.amd.com\/Documents\/GCN_Architecture_whitepaper.pdf .  AMD. 2012. AMD Graphics Cores Next (GCN) Architecture . https:\/\/www.amd.com\/Documents\/GCN_Architecture_whitepaper.pdf ."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3123975"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173169"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815970"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/1346281.1346286"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540741"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2011.5749717"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2009.26"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/1736020.1736060"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0169-7552(98)00110-X"},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) .","author":"Cong Jason","year":"2017","unstructured":"Jason Cong , Zhenman Fang , Yuchen Hao , and Glenn Reinman . 2017 . Supporting Address Translation for Accelerator-Centric Architectures . In Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) . Jason Cong, Zhenman Fang, Yuchen Hao, and Glenn Reinman. 2017. Supporting Address Translation for Accelerator-Centric Architectures. In Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) ."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2938369"},{"key":"e_1_3_2_1_19_1","volume-title":"HVD-TLS: A Novel Framework of Thread Level Speculation. In International Conference on Trust, Security and Privacy in Computing and Communications (TrustCom) .","author":"Fan Xu","year":"2012","unstructured":"Xu Fan , Shen Li , and Wang Zhiying . 2012 . HVD-TLS: A Novel Framework of Thread Level Speculation. In International Conference on Trust, Security and Privacy in Computing and Communications (TrustCom) . Xu Fan, Shen Li, and Wang Zhiying. 2012. HVD-TLS: A Novel Framework of Thread Level Speculation. In International Conference on Trust, Security and Privacy in Computing and Communications (TrustCom) ."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322224"},{"key":"e_1_3_2_1_21_1","unstructured":"Google. [n.d.]. Google GPUs Cloud Computing . https:\/\/cloud.google.com\/gpu .  Google. [n.d.]. Google GPUs Cloud Computing . https:\/\/cloud.google.com\/gpu ."},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the Symposium on Principles and Practice of Parallel Programming (PPoPP) .","author":"Pascal Grosset Andre Vincent","year":"2011","unstructured":"Andre Vincent Pascal Grosset , Peihong Zhu , Shusen Liu , Suresh Venkatasubramanian , and Mary Hall . 2011 . Evaluating Graph Coloring on GPUs . In Proceedings of the Symposium on Principles and Practice of Parallel Programming (PPoPP) . Andre Vincent Pascal Grosset, Peihong Zhu, Shusen Liu, Suresh Venkatasubramanian, and Mary Hall. 2011. Evaluating Graph Coloring on GPUs. In Proceedings of the Symposium on Principles and Practice of Parallel Programming (PPoPP) ."},{"key":"e_1_3_2_1_23_1","unstructured":"IBM. [n.d.]. GPUs Cloud Computing . https:\/\/www.ibm.com\/cloud\/gpu .  IBM. [n.d.]. GPUs Cloud Computing . https:\/\/www.ibm.com\/cloud\/gpu ."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1137\/0914041"},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the International Symposium on Computer Architecture (ISCA) .","author":"Gokul","unstructured":"Gokul B. Kandiraju and Anand Sivasubramaniam. 2002. Going the Distance for TLB Prefetching: An Application-driven Study . In Proceedings of the International Symposium on Computer Architecture (ISCA) . Gokul B. Kandiraju and Anand Sivasubramaniam. 2002. Going the Distance for TLB Prefetching: An Application-driven Study. In Proceedings of the International Symposium on Computer Architecture (ISCA) ."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2731186.2731192"},{"key":"e_1_3_2_1_27_1","unstructured":"Hyesoon Kim Jaekyu Lee Nagesh B. Lakshminarayana Jaewoong Sim Jieun Lim Tri Pho Hyojong Kim and Ramyad Hadidi. 2012. MacSim: A CPU-GPU Heterogeneous Simulation Framework User Guide.  Hyesoon Kim Jaekyu Lee Nagesh B. Lakshminarayana Jaewoong Sim Jieun Lim Tri Pho Hyojong Kim and Ramyad Hadidi. 2012. MacSim: A CPU-GPU Heterogeneous Simulation Framework User Guide."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2628071.2628075"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304044"},{"key":"e_1_3_2_1_30_1","volume-title":"Enabling Efficient Preemption for SIMT Architectures with Lightweight Context Switching. In International Conference for High Performance Computing, Networking, Storage and Analysis (SC) .","author":"Lin Zhen","year":"2016","unstructured":"Zhen Lin , Lars Nyland , and Huiyang Zhou . 2016 . Enabling Efficient Preemption for SIMT Architectures with Lightweight Context Switching. In International Conference for High Performance Computing, Networking, Storage and Analysis (SC) . Zhen Lin, Lars Nyland, and Huiyang Zhou. 2016. Enabling Efficient Preemption for SIMT Architectures with Lightweight Context Switching. In International Conference for High Performance Computing, Networking, Storage and Analysis (SC) ."},{"key":"e_1_3_2_1_31_1","volume-title":"Speculative Execution on GPU: An Exploratory Study. In International Conference on Parallel Processing (ICPP) .","author":"Liu Shaoshan","year":"2010","unstructured":"Shaoshan Liu , Christine Eisenbeis , and Jean-Luc Gaudiot . 2010 . Speculative Execution on GPU: An Exploratory Study. In International Conference on Parallel Processing (ICPP) . Shaoshan Liu, Christine Eisenbeis, and Jean-Luc Gaudiot. 2010. Speculative Execution on GPU: An Exploratory Study. In International Conference on Parallel Processing (ICPP) ."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/2445572.2445574"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.2172\/951102"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2402.322385"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2008.4510742"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) .","author":"Mutlu Onur","unstructured":"Onur Mutlu , Jared Stark , Chris Wilkerson , and Yale N. Patt . 2003. Runahead Execution: An Alternative to Very Large Instruction Windows for Out-of-Order Processors . In Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) . Onur Mutlu, Jared Stark, Chris Wilkerson, and Yale N. Patt. 2003. Runahead Execution: An Alternative to Very Large Instruction Windows for Out-of-Order Processors. In Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) ."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807626"},{"key":"e_1_3_2_1_38_1","unstructured":"NVIDIA Corp. 2016a. NVIDIA Tesla P100 . https:\/\/images.nvidia.com\/content\/pdf\/tesla\/whitepaper\/pascal-architecture-whitepaper.pdf .  NVIDIA Corp. 2016a. NVIDIA Tesla P100 . https:\/\/images.nvidia.com\/content\/pdf\/tesla\/whitepaper\/pascal-architecture-whitepaper.pdf ."},{"key":"e_1_3_2_1_39_1","unstructured":"NVIDIA Corp. 2016b. NVIDIA Tesla V100 . http:\/\/images.nvidia.com\/content\/volta-architecture\/pdf\/volta-architecture-whitepaper.pdf .  NVIDIA Corp. 2016b. NVIDIA Tesla V100 . http:\/\/images.nvidia.com\/content\/volta-architecture\/pdf\/volta-architecture-whitepaper.pdf ."},{"key":"e_1_3_2_1_40_1","unstructured":"NVIDIA Corp. 2017. CUDA Toolkit Documentation . https:\/\/docs.nvidia.com\/cuda\/index.html .  NVIDIA Corp. 2017. CUDA Toolkit Documentation . https:\/\/docs.nvidia.com\/cuda\/index.html ."},{"key":"e_1_3_2_1_41_1","unstructured":"NVIDIA Corp. 2018. NVIDIA Driver Downloads . https:\/\/www.nvidia.com .  NVIDIA Corp. 2018. NVIDIA Driver Downloads . https:\/\/www.nvidia.com ."},{"key":"e_1_3_2_1_42_1","unstructured":"NVIDIA Corp. 2019. NVIDIA Visual Profiler . https:\/\/developer.nvidia.com\/nvidia-visual-profiler .  NVIDIA Corp. 2019. NVIDIA Visual Profiler . https:\/\/developer.nvidia.com\/nvidia-visual-profiler ."},{"key":"e_1_3_2_1_43_1","volume-title":"Proceedings of the International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) .","author":"Kyu Park Jason Jong","year":"2015","unstructured":"Jason Jong Kyu Park , Yongjun Park , and Scott Mahlke . 2015 . Chimera: Collaborative Preemption for Multitasking on a Shared GPU . In Proceedings of the International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) . Jason Jong Kyu Park, Yongjun Park, and Scott Mahlke. 2015. Chimera: Collaborative Preemption for Multitasking on a Shared GPU. In Proceedings of the International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) ."},{"key":"e_1_3_2_1_44_1","unstructured":"PCI-SIG. 2015. PCI Express Base Specification Revision 3.1a .  PCI-SIG. 2015. PCI Express Base Specification Revision 3.1a ."},{"key":"e_1_3_2_1_45_1","unstructured":"Peng Wang. 2017. UNIFIED MEMORY ON P100 . https:\/\/www.olcf.ornl.gov\/wp-content\/uploads\/2018\/02\/SummitDev_Unified-Memory.pdf .  Peng Wang. 2017. UNIFIED MEMORY ON P100 . https:\/\/www.olcf.ornl.gov\/wp-content\/uploads\/2018\/02\/SummitDev_Unified-Memory.pdf ."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541942"},{"key":"e_1_3_2_1_47_1","volume-title":"Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) .","author":"Power Jason","unstructured":"Jason Power , Mark D. Hill , and David A. Wood . 2014. Supporting x86--64 Address Translation for 100s of GPU Lanes . In Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) . Jason Power, Mark D. Hill, and David A. Wood. 2014. Supporting x86--64 Address Translation for 100s of GPU Lanes. In Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) ."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00025"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2014.6853208"},{"key":"e_1_3_2_1_50_1","volume-title":"Proceedings of the International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) .","author":"Yoon Hongil","unstructured":"Hongil Yoon , Jason Lowe-Power , and Gurindar S. Sohi . 2018. Filtering Translation Bandwidth with Virtual Caching . In Proceedings of the International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) . Hongil Yoon, Jason Lowe-Power, and Gurindar S. Sohi. 2018. Filtering Translation Bandwidth with Virtual Caching. In Proceedings of the International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS) ."},{"key":"e_1_3_2_1_51_1","volume-title":"Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) .","author":"Yoon Hongil","unstructured":"Hongil Yoon and Gurindar S. Sohi . 2016. Revisiting virtual L1 caches: A practical design using dynamic synonym remapping . In Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) . Hongil Yoon and Gurindar S. Sohi. 2016. Revisiting virtual L1 caches: A practical design using dynamic synonym remapping. In Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) ."},{"key":"e_1_3_2_1_52_1","volume-title":"Proceedings of the International Symposium on Computer Architecture (ISCA) .","author":"Yoon M. K.","unstructured":"M. K. Yoon , K. Kim , S. Lee , W. W. Ro , and M. Annavaram . 2016. Virtual Thread: Maximizing Thread-Level Parallelism beyond GPU Scheduling Limit . In Proceedings of the International Symposium on Computer Architecture (ISCA) . M. K. Yoon, K. Kim, S. Lee, W. W. Ro, and M. Annavaram. 2016. Virtual Thread: Maximizing Thread-Level Parallelism beyond GPU Scheduling Limit. In Proceedings of the International Symposium on Computer Architecture (ISCA) ."},{"key":"e_1_3_2_1_53_1","volume-title":"Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) .","author":"Zheng Tianhao","unstructured":"Tianhao Zheng , David Nellans , Arslan Zulfiqar , Mark Stephenson , and Stephen W. Keckler . 2016. Towards High Performance Paged Memory for GPUs . In Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) . Tianhao Zheng, David Nellans, Arslan Zulfiqar, Mark Stephenson, and Stephen W. Keckler. 2016. Towards High Performance Paged Memory for GPUs . In Proceedings of the International Symposium on High Performance Computer Architecture (HPCA) ."}],"event":{"name":"ASPLOS '20: Architectural Support for Programming Languages and Operating Systems","location":"Lausanne Switzerland","acronym":"ASPLOS '20","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the Twenty-Fifth International Conference on Architectural Support for Programming Languages and Operating Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3373376.3378529","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3373376.3378529","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:38:16Z","timestamp":1750199896000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3373376.3378529"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,3,9]]},"references-count":53,"alternative-id":["10.1145\/3373376.3378529","10.1145\/3373376"],"URL":"https:\/\/doi.org\/10.1145\/3373376.3378529","relation":{},"subject":[],"published":{"date-parts":[[2020,3,9]]},"assertion":[{"value":"2020-03-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}