{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T14:29:59Z","timestamp":1780496999667,"version":"3.54.1"},"reference-count":44,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015,2]]},"DOI":"10.1109\/hpca.2015.7056046","type":"proceedings-article","created":{"date-parts":[[2015,3,10]],"date-time":"2015-03-10T22:13:51Z","timestamp":1426025631000},"page":"354-365","source":"Crossref","is-referenced-by-count":59,"title":["Unlocking bandwidth for GPUs in CC-NUMA systems"],"prefix":"10.1109","author":[{"given":"Neha","family":"Agarwal","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"David","family":"Nellans","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mike","family":"O'Connor","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Stephen W.","family":"Keckler","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Thomas F.","family":"Wenisch","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref39","article-title":"Optimizing CoMD: A Molecular Dynamics Proxy Application Study","author":"mohd-yusof","year":"2014","journal-title":"GPU Technology Conference (GTC)"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485943"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835964"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.1998.694775"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835965"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"ref36","year":"2014","journal-title":"Intel Xeon Processor E7-4870"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2008.4510742"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/2600212.2600231"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/1995896.1995911"},{"key":"ref40","first-page":"196","article-title":"Software Design Space Exploration for Exascale Combustion Co-design","author":"chan","year":"2013","journal-title":"International Supercomputing Conference (ISC)"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/582034.582067"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/74850.74854"},{"key":"ref13","first-page":"1","article-title":"On the Importance of Parallel Application Placement in NUMA Multiprocessors","author":"brecht","year":"1993","journal-title":"Symposium on Experiences with Distributed and Multiprocessor Systems IV"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/71.180624"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/237090.237205"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/S1383-7621(02)00066-8"},{"key":"ref17","author":"corbet","year":"2012","journal-title":"Autonuma The Other Approach to Numa Scheduling"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2451116.2451157"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/1272996.1273004"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541942"},{"key":"ref4","year":"2010","journal-title":"The HyperTransport 3 1 specification"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/L-CA.2012.2"},{"key":"ref3","year":"2014","journal-title":"NVIDIA Launches World's First High-Speed GPU Interconnect Helping Pave the Way to Exascale Computing"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2013.6557176"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2011.65"},{"key":"ref5","year":"2009","journal-title":"An Introduction to the Intel QuickPath Interconnect"},{"key":"ref8","first-page":"14","article-title":"Operating System Support for NVM+DRAM Hybrid Main Memory","author":"mogul","year":"2009","journal-title":"Workshop on Hot Topics in Operating Systems (HotOS)"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/DATE.2011.5763155"},{"key":"ref2","year":"2014","journal-title":"What is Heterogeneous System Architecture (Hsa)?"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IGCC.2011.6008569"},{"key":"ref1","year":"2014","journal-title":"Compute Unified Device Architecture"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/1736020.1736036"},{"key":"ref22","first-page":"1","article-title":"A Case for NUMA-aware Contention Management on Multicore Systems","author":"blagodurov","year":"2011","journal-title":"Proceedings of the USENIX Annual Technical Conference (USENIX"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2008.48"},{"key":"ref42","article-title":"XSBench - The Development and Verification of a Performance Abstraction for Monte Carlo Reactor Analysis","author":"tramm","year":"2014","journal-title":"The Role of Reactor Physics toward a Sustainable Future (PHYSOR)"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/2541228.2541231"},{"key":"ref41","article-title":"Improving Performance via Mini-applications","author":"heroux","year":"2009","journal-title":"Tech Rep"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/1854273.1854314"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/2694344.2694381"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2010.100"},{"key":"ref43","year":"0","journal-title":"Stream Sustainable Memory Bandwidth in High Performance Computers"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2013.6618807"}],"event":{"name":"2015 IEEE 21st International Symposium on High Performance Computer Architecture (HPCA)","location":"Burlingame, CA, USA","start":{"date-parts":[[2015,2,7]]},"end":{"date-parts":[[2015,2,11]]}},"container-title":["2015 IEEE 21st International Symposium on High Performance Computer Architecture (HPCA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7048058\/7056013\/07056046.pdf?arnumber=7056046","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,3,24]],"date-time":"2017-03-24T18:42:16Z","timestamp":1490380936000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7056046\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,2]]},"references-count":44,"URL":"https:\/\/doi.org\/10.1109\/hpca.2015.7056046","relation":{},"subject":[],"published":{"date-parts":[[2015,2]]}}}