{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T21:15:21Z","timestamp":1772831721334,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,8]],"date-time":"2022-10-08T00:00:00Z","timestamp":1665187200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,8]]},"DOI":"10.1145\/3559009.3569649","type":"proceedings-article","created":{"date-parts":[[2023,1,27]],"date-time":"2023-01-27T14:02:50Z","timestamp":1674828170000},"page":"304-316","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Locality-Aware Optimizations for Improving Remote Memory Latency in Multi-GPU Systems"],"prefix":"10.1145","author":[{"given":"Leul","family":"Belayneh","sequence":"first","affiliation":[{"name":"University of Michigan"}]},{"given":"Haojie","family":"Ye","sequence":"additional","affiliation":[{"name":"University of Michigan"}]},{"given":"Kuan-Yu","family":"Chen","sequence":"additional","affiliation":[{"name":"University of Michigan"}]},{"given":"David","family":"Blaauw","sequence":"additional","affiliation":[{"name":"University of Michigan"}]},{"given":"Trevor","family":"Mudge","sequence":"additional","affiliation":[{"name":"University of Michigan"}]},{"given":"Ronald","family":"Dreslinski","sequence":"additional","affiliation":[{"name":"University of Michigan"}]},{"given":"Nishil","family":"Talati","sequence":"additional","affiliation":[{"name":"University of Michigan"}]}],"member":"320","published-online":{"date-parts":[[2023,1,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"AMD. 2015. AMD APP SDK OpenCL Optimization Guide.  AMD. 2015. AMD APP SDK OpenCL Optimization Guide."},{"key":"e_1_3_2_1_2_1","unstructured":"AMD. 2020. AMD CrossfireTM Technology. https:\/\/www.amd.com\/en\/technologies\/crossfire. last accessed on 11\/7\/2021.  AMD. 2020. AMD CrossfireTM Technology. https:\/\/www.amd.com\/en\/technologies\/crossfire. last accessed on 11\/7\/2021."},{"key":"e_1_3_2_1_3_1","unstructured":"AMD. 2020. AMD Radeon\u2122 Pro V520 Graphics.  AMD. 2020. AMD Radeon\u2122 Pro V520 Graphics."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080231"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00055"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3410463.3414639"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3399730"},{"key":"e_1_3_2_1_8_1","unstructured":"P. Bright. 2016. Moore's law really is dead this time. https:\/\/arstechnica.com\/%20information-technology\/2016\/02\/moores-law-really-is-dead-this-time\/. last accessed on 11\/7\/2021.  P. Bright. 2016. Moore's law really is dead this time. https:\/\/arstechnica.com\/%20information-technology\/2016\/02\/moores-law-really-is-dead-this-time\/. last accessed on 11\/7\/2021."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.16"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPDC.2014.29"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783738"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1735688.1735702"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3001589"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586114"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2017.37"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783739"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2019.00028"},{"key":"e_1_3_2_1_18_1","volume-title":"Unison Cache: A Scalable and Effective Die-Stacked DRAM Cache. 2014 47th Annual IEEE\/ACM International Symposium on Microarchitecture","author":"Jevdjic Djordje","year":"2014","unstructured":"Djordje Jevdjic , Gabriel H. Loh , Cansu Kaynak , and Babak Falsafi . 2014 . Unison Cache: A Scalable and Effective Die-Stacked DRAM Cache. 2014 47th Annual IEEE\/ACM International Symposium on Microarchitecture (2014), 25--37. Djordje Jevdjic, Gabriel H. Loh, Cansu Kaynak, and Babak Falsafi. 2014. Unison Cache: A Scalable and Effective Die-Stacked DRAM Cache. 2014 47th Annual IEEE\/ACM International Symposium on Microarchitecture (2014), 25--37."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485957"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.55"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378529"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISVLSI.2013.6654614"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3322127"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1147\/sj.71.0015"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3124534"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480088"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00020"},{"key":"e_1_3_2_1_28_1","unstructured":"N. P. Jouppi N. Muralimanohar R. Balasubramonian&dagger;. 2009. CACTI 6.0: A Tool to Understand Large Caches. In HP laboratories.  N. P. Jouppi N. Muralimanohar R. Balasubramonian&dagger;. 2009. CACTI 6.0: A Tool to Understand Large Caches. In HP laboratories."},{"key":"e_1_3_2_1_29_1","unstructured":"NVIDIA. [n. d.]. NVLINK AND NVSWITCH The Building Blocks of Advanced Multi-GPU Communication. last accessed on 11\/7\/2021.  NVIDIA. [n. d.]. NVLINK AND NVSWITCH The Building Blocks of Advanced Multi-GPU Communication. last accessed on 11\/7\/2021."},{"key":"e_1_3_2_1_30_1","unstructured":"NVIDIA. 2020. NVIDIA DGX Systems. https:\/\/www.nvidia.com\/en-us\/data-center\/dgx-systems\/. last accessed on 11\/7\/2021.  NVIDIA. 2020. NVIDIA DGX Systems. https:\/\/www.nvidia.com\/en-us\/data-center\/dgx-systems\/. last accessed on 11\/7\/2021."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.30"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540717"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.16"},{"key":"e_1_3_2_1_34_1","volume-title":"Divergence-Aware Warp Scheduling. In 2013 46th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). 99--110","author":"Rogers Timothy G.","unstructured":"Timothy G. Rogers , Mike O'Connor , and Tor M. Aamodt . 2013 . Divergence-Aware Warp Scheduling. In 2013 46th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). 99--110 . Timothy G. Rogers, Mike O'Connor, and Tor M. Aamodt. 2013. Divergence-Aware Warp Scheduling. In 2013 46th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO). 99--110."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00025"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322230"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2016.7581262"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2010.54"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00074"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3287624.3287633"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00036"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00035"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080243"},{"key":"e_1_3_2_1_44_1","unstructured":"Tomofumi Yuki and Louis-No\u00ebl Pouchet. 2015. Polybench 4.0.  Tomofumi Yuki and Louis-No\u00ebl Pouchet. 2015. Polybench 4.0."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2014.2359882"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2996190"}],"event":{"name":"PACT '22: International Conference on Parallel Architectures and Compilation Techniques","location":"Chicago Illinois","acronym":"PACT '22","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","IFIP WG 10.3 IFIP WG 10.3","IEEE CS"]},"container-title":["Proceedings of the International Conference on Parallel Architectures and Compilation Techniques"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3559009.3569649","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3559009.3569649","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:38Z","timestamp":1750186958000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3559009.3569649"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,8]]},"references-count":46,"alternative-id":["10.1145\/3559009.3569649","10.1145\/3559009"],"URL":"https:\/\/doi.org\/10.1145\/3559009.3569649","relation":{},"subject":[],"published":{"date-parts":[[2022,10,8]]},"assertion":[{"value":"2023-01-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}