{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T16:15:52Z","timestamp":1772727352267,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,9,30]],"date-time":"2020-09-30T00:00:00Z","timestamp":1601424000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NRF","award":["2015M3C4A7065647,2017R1A2B4011457"],"award-info":[{"award-number":["2015M3C4A7065647,2017R1A2B4011457"]}]},{"DOI":"10.13039\/100004311","name":"Advanced Micro Devices","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100004311","id-type":"DOI","asserted-by":"publisher"}]},{"name":"MINECO","award":["TIN2016-78799-P"],"award-info":[{"award-number":["TIN2016-78799-P"]}]},{"name":"NSF CNS","award":["1525412,1525474"],"award-info":[{"award-number":["1525412,1525474"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,9,30]]},"DOI":"10.1145\/3410463.3414639","type":"proceedings-article","created":{"date-parts":[[2020,9,30]],"date-time":"2020-09-30T10:43:04Z","timestamp":1601462584000},"page":"455-466","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":23,"title":["Valkyrie"],"prefix":"10.1145","author":[{"given":"Trinayan","family":"Baruah","sequence":"first","affiliation":[{"name":"Northeastern University, Boston, MA, USA"}]},{"given":"Yifan","family":"Sun","sequence":"additional","affiliation":[{"name":"Northeastern University and William &amp; Mary, Williamsburg, VA, USA"}]},{"given":"Saiful A.","family":"Mojumder","sequence":"additional","affiliation":[{"name":"Boston University, Boston, MA, USA"}]},{"given":"Jos\u00e9 L.","family":"Abell\u00e1n","sequence":"additional","affiliation":[{"name":"Universidad Cat\u00f3lica San Antonio de Murcia, Murcia, Spain"}]},{"given":"Yash","family":"Ukidave","sequence":"additional","affiliation":[{"name":"Millennium USA, Boston, MA, USA"}]},{"given":"Ajay","family":"Joshi","sequence":"additional","affiliation":[{"name":"Boston University, Boston, MA, USA"}]},{"given":"Norman","family":"Rubin","sequence":"additional","affiliation":[{"name":"Northeastern University, Boston, MA, USA"}]},{"given":"John","family":"Kim","sequence":"additional","affiliation":[{"name":"KAIST, Daejeon, South Korea"}]},{"given":"David","family":"Kaeli","sequence":"additional","affiliation":[{"name":"Northeastern University, Boston, MA, USA"}]}],"member":"320","published-online":{"date-parts":[[2020,9,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056046"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.1996.501191"},{"key":"e_1_3_2_1_3_1","unstructured":"AMD. 2009. AMD APU. (2009). http:\/\/developer.amd.com\/wordpress\/media\/2012\/10\/apu101.pdf  AMD. 2009. AMD APU. (2009). http:\/\/developer.amd.com\/wordpress\/media\/2012\/10\/apu101.pdf"},{"key":"e_1_3_2_1_4_1","unstructured":"AMD. 2015. AMD APP SDK OpenCL Optimization Guide.  AMD. 2015. AMD APP SDK OpenCL Optimization Guide."},{"key":"e_1_3_2_1_5_1","unstructured":"AMD. 2018. OpenCL Shared Virtual Memory. (2018). https:\/\/www.khronos.org\/registry\/OpenCL\/sdk\/2.1\/docs\/man\/xhtml\/sharedVirtualMemory.html  AMD. 2018. OpenCL Shared Virtual Memory. (2018). https:\/\/www.khronos.org\/registry\/OpenCL\/sdk\/2.1\/docs\/man\/xhtml\/sharedVirtualMemory.html"},{"key":"e_1_3_2_1_6_1","unstructured":"AMD. 2019. AMD Radeon VII. (2019). https:\/\/www.olcf.ornl.gov\/wp-content\/uploads\/2019\/10\/ORNL_Application_Readiness_Workshop-AMD_GPU_Basics.pdf  AMD. 2019. AMD Radeon VII. (2019). https:\/\/www.olcf.ornl.gov\/wp-content\/uploads\/2019\/10\/ORNL_Application_Readiness_Workshop-AMD_GPU_Basics.pdf"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2016.7936222"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3123975"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3296957.3173169"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2010.50"},{"key":"e_1_3_2_1_11_1","volume-title":"Griffin: Hardware-Software Support for Efficient Page Migration in Multi-GPU Systems. In 2020 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE, 596--609","author":"Baruah Trinayan","year":"2020","unstructured":"Trinayan Baruah , Yifan Sun , Ali Tolga Dincc er, Saiful A Mojumder , Jos\u00e9 L Abell\u00e1n , Yash Ukidave , Ajay Joshi , Norman Rubin , John Kim , and David Kaeli . 2020 . Griffin: Hardware-Software Support for Efficient Page Migration in Multi-GPU Systems. In 2020 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE, 596--609 . Trinayan Baruah, Yifan Sun, Ali Tolga Dincc er, Saiful A Mojumder, Jos\u00e9 L Abell\u00e1n, Yash Ukidave, Ajay Joshi, Norman Rubin, John Kim, and David Kaeli. 2020. Griffin: Hardware-Software Support for Efficient Page Migration in Multi-GPU Systems. In 2020 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE, 596--609."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2004.21"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2017.3711640"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2011.5749717"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/1735971.1736060"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1735688.1735702"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3001589"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322224"},{"key":"e_1_3_2_1_19_1","unstructured":"Fabien Gaud Baptiste Lepers Jeremie Decouchant Justin Funston Alexandra Fedorova and Vivien Qu\u00e9ma. 2014. Large Pages May Be Harmful on {NUMA} Systems. In 2014 {USENIX} Annual Technical Conference ({SENIX}{ATC} 14). 231--242.  Fabien Gaud Baptiste Lepers Jeremie Decouchant Justin Funston Alexandra Fedorova and Vivien Qu\u00e9ma. 2014. Large Pages May Be Harmful on {NUMA} Systems. In 2014 {USENIX} Annual Technical Conference ({SENIX}{ATC} 14). 231--242."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2015.02.028"},{"key":"e_1_3_2_1_21_1","volume-title":"Analyzing and Leveraging Remote-core Bandwidth for Enhanced Performance in GPUs. In 2019 28th International Conference on Parallel Architectures and Compilation Techniques (PACT). IEEE, 258--271","author":"Ibrahim Mohamed Assem","year":"2019","unstructured":"Mohamed Assem Ibrahim , Hongyuan Liu , Onur Kayiran , and Adwait Jog . 2019 . Analyzing and Leveraging Remote-core Bandwidth for Enhanced Performance in GPUs. In 2019 28th International Conference on Parallel Architectures and Compilation Techniques (PACT). IEEE, 258--271 . Mohamed Assem Ibrahim, Hongyuan Liu, Onur Kayiran, and Adwait Jog. 2019. Analyzing and Leveraging Remote-core Bandwidth for Enhanced Performance in GPUs. In 2019 28th International Conference on Parallel Architectures and Compilation Techniques (PACT). IEEE, 258--271."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3309710"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/NOCS.2009.5071460"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/325096.325162"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2002.1003578"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2018.00052"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2013.07.014"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304044"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/11758549_29"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358294"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2007.370271"},{"key":"e_1_3_2_1_33_1","volume-title":"A tool to model large caches. HP laboratories","author":"Muralimanohar Naveen","year":"2009","unstructured":"Naveen Muralimanohar , Rajeev Balasubramonian , and Norman P Jouppi . 2009. CACTI 6.0 : A tool to model large caches. HP laboratories , Vol. 27 ( 2009 ), 28. Naveen Muralimanohar, Rajeev Balasubramonian, and Norman P Jouppi. 2009. CACTI 6.0: A tool to model large caches. HP laboratories, Vol. 27 (2009), 28."},{"key":"e_1_3_2_1_34_1","unstructured":"NVIDIA. 2018. NVIDIA Unified Memory. (2018). http:\/\/on-demand.gputechconf.com\/gtc\/2018\/presentation\/s8430-everything-you-need-to-know-about-unified-memory.pdf  NVIDIA. 2018. NVIDIA Unified Memory. (2018). http:\/\/on-demand.gputechconf.com\/gtc\/2018\/presentation\/s8430-everything-you-need-to-know-about-unified-memory.pdf"},{"key":"e_1_3_2_1_35_1","volume-title":"https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/design-visualization\/technologies\/turing-architecture\/NVIDIA-Turing-Architecture-Whitepaper.pdf","author":"NVIDIA.","year":"2019","unstructured":"NVIDIA. 2019. Turing GPU. ( 2019 ). https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/design-visualization\/technologies\/turing-architecture\/NVIDIA-Turing-Architecture-Whitepaper.pdf NVIDIA. 2019. Turing GPU. (2019). https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/design-visualization\/technologies\/turing-architecture\/NVIDIA-Turing-Architecture-Whitepaper.pdf"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.32"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2654822.2541942"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835965"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/339647.339666"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/1360612.1360617"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00025"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00036"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2013.6522351"},{"key":"e_1_3_2_1_44_1","volume-title":"23rd Annual International Symposium on Computer Architecture (ISCA'96)","author":"Sohi GS","year":"1996","unstructured":"GS Sohi and TM Austin . 1996 . High-bandwidth address translation for multiple-issue processors . In 23rd Annual International Symposium on Computer Architecture (ISCA'96) . IEEE, 158--158. GS Sohi and TM Austin. 1996. High-bandwidth address translation for multiple-issue processors. In 23rd Annual International Symposium on Computer Architecture (ISCA'96). IEEE, 158--158."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322230"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2016.7581262"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.5555\/2555729.2555745"},{"key":"e_1_3_2_1_48_1","volume-title":"https:\/\/www.techpowerup.com\/gpu-specs\/radeon-r9-nano.c2735","author":"Powerup Tech","year":"2015","unstructured":"Tech Powerup . 2015. AMD R9 Nano . ( 2015 ). https:\/\/www.techpowerup.com\/gpu-specs\/radeon-r9-nano.c2735 Tech Powerup. 2015. AMD R9Nano. (2015). https:\/\/www.techpowerup.com\/gpu-specs\/radeon-r9-nano.c2735"},{"key":"e_1_3_2_1_49_1","first-page":"313","article-title":"Multi-level cache architecture having a selective victim cache","volume":"11","author":"Vanderwiel Steven","year":"2007","unstructured":"Steven Vanderwiel . 2007 . Multi-level cache architecture having a selective victim cache . US Patent App. 11\/259 , 313 . Steven Vanderwiel. 2007. Multi-level cache architecture having a selective victim cache. US Patent App. 11\/259,313.","journal-title":"US Patent App."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2016.7482091"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080211"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173195"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00035"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446077"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/2786572.2786596"}],"event":{"name":"PACT '20: International Conference on Parallel Architectures and Compilation Techniques","location":"Virtual Event GA USA","acronym":"PACT '20","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the ACM International Conference on Parallel Architectures and Compilation Techniques"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3410463.3414639","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3410463.3414639","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:31:51Z","timestamp":1750195911000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3410463.3414639"}},"subtitle":["Leveraging Inter-TLB Locality to Enhance GPU Performance"],"short-title":[],"issued":{"date-parts":[[2020,9,30]]},"references-count":55,"alternative-id":["10.1145\/3410463.3414639","10.1145\/3410463"],"URL":"https:\/\/doi.org\/10.1145\/3410463.3414639","relation":{},"subject":[],"published":{"date-parts":[[2020,9,30]]},"assertion":[{"value":"2020-09-30","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}