{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,24]],"date-time":"2025-09-24T10:19:36Z","timestamp":1758709176218},"reference-count":70,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,3]]},"DOI":"10.1109\/hpca.2016.7446089","type":"proceedings-article","created":{"date-parts":[[2016,4,4]],"date-time":"2016-04-04T22:03:56Z","timestamp":1459807436000},"page":"494-506","source":"Crossref","is-referenced-by-count":37,"title":["Selective GPU caches to eliminate CPU-GPU HW cache coherence"],"prefix":"10.1109","author":[{"given":"Neha","family":"Agarwal","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David","family":"Nellans","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eiman","family":"Ebrahimi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thomas F.","family":"Wenisch","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"John","family":"Danskin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stephen W.","family":"Keckler","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref70","DOI":"10.1109\/PACT.2011.10"},{"doi-asserted-by":"publisher","key":"ref39","DOI":"10.1109\/ISCA.1995.524548"},{"doi-asserted-by":"publisher","key":"ref38","DOI":"10.1145\/339647.339669"},{"doi-asserted-by":"publisher","key":"ref33","DOI":"10.1109\/MM.2011.40"},{"year":"2015","author":"huh","article-title":"Intel Xeon Processor E5 v3 Family","key":"ref32"},{"year":"2009","journal-title":"Intel Corporation","article-title":"An Introduction to the Intel QuickPath Interconnect","key":"ref31"},{"year":"2015","journal-title":"IBM Corporation","article-title":"POWER8 Coherent Accelerator Processor Interface (CAPI)","key":"ref30"},{"doi-asserted-by":"publisher","key":"ref37","DOI":"10.1145\/2872887.2750421"},{"key":"ref36","first-page":"99","article-title":"WAY-POINT: Scaling Coherence to Thousand-core Architectures","author":"keim","year":"2010","journal-title":"International Conference on Parallel Architectures and Compilation Techniques (PACT)"},{"key":"ref35","first-page":"140","article-title":"Rigel: An Architecture and Scalable Programming Interface for a 1000-core Accelerator","author":"keim","year":"2009","journal-title":"International Symposium on Computer Architecture (ISCA)"},{"doi-asserted-by":"publisher","key":"ref34","DOI":"10.1145\/2485922.2485968"},{"doi-asserted-by":"publisher","key":"ref60","DOI":"10.1109\/HPCA.2013.6522351"},{"key":"ref62","article-title":"Parboil: A Revised Benchmark Suite for Scientific and Commercial Throughput Computing","author":"stratton","year":"2012","journal-title":"IMPACT Technical Report IMPACT-12-01 University of Illinois at Urbana-Champaign Tech Rep"},{"doi-asserted-by":"publisher","key":"ref61","DOI":"10.1109\/2.55497"},{"doi-asserted-by":"publisher","key":"ref63","DOI":"10.1109\/ISCA.2006.21"},{"key":"ref28","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1145\/1024393.1024406","article-title":"Coherence Decoupling: Making Use of Incoherence","author":"huh","year":"2004","journal-title":"International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)"},{"doi-asserted-by":"publisher","key":"ref64","DOI":"10.1109\/DATE.2004.1269047"},{"doi-asserted-by":"publisher","key":"ref27","DOI":"10.1109\/JSSC.2013.2258815"},{"doi-asserted-by":"publisher","key":"ref65","DOI":"10.1145\/2694344.2694356"},{"doi-asserted-by":"publisher","key":"ref66","DOI":"10.1145\/2451116.2451119"},{"year":"2010","journal-title":"HyperTransport consortium","article-title":"HyperTransport 3.1 Specification","key":"ref29"},{"key":"ref67","article-title":"Scaling the Power Wall: A Path to Exas-cale","author":"villa","year":"2014","journal-title":"International Conference on High Performance Networking and Computing (Supercomputing)"},{"doi-asserted-by":"publisher","key":"ref68","DOI":"10.1109\/MICRO.2007.14"},{"key":"ref69","first-page":"423","article-title":"A Tag-less Coherence Directory","author":"zebchuk","year":"2009","journal-title":"International Symposium on Microarchitecture (MICRO)"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1145\/2694344.2694381"},{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1109\/HPCA.2015.7056046"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1145\/1736020.1736059"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1109\/HPCA.2014.6835930"},{"doi-asserted-by":"publisher","key":"ref21","DOI":"10.1145\/1555754.1555779"},{"key":"ref24","first-page":"513","article-title":"A Case of System-level Hardware\/Software Codesign and Co-verification of a Commodity Multi-processor System with Custom Hardware","author":"hong","year":"2012","journal-title":"Int'l Conference on Hard-ware\/Software Codesign and System Synthesis (CODES+ISSS)"},{"doi-asserted-by":"publisher","key":"ref23","DOI":"10.1145\/161541.161544"},{"year":"2014","journal-title":"The HSA Foundation","article-title":"HSA Platform System Architecture Specification - Provisional 1.0","key":"ref26"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1145\/2541940.2541981"},{"doi-asserted-by":"publisher","key":"ref50","DOI":"10.1145\/2485922.2485935"},{"doi-asserted-by":"publisher","key":"ref51","DOI":"10.1007\/978-3-662-44491-7_1"},{"doi-asserted-by":"publisher","key":"ref59","DOI":"10.1145\/2830772.2830821"},{"doi-asserted-by":"publisher","key":"ref58","DOI":"10.1109\/HPCA.2012.6168950"},{"doi-asserted-by":"publisher","key":"ref57","DOI":"10.1145\/2370816.2370853"},{"doi-asserted-by":"publisher","key":"ref56","DOI":"10.1109\/ISCA.2014.6853195"},{"doi-asserted-by":"publisher","key":"ref55","DOI":"10.1145\/1854273.1854331"},{"doi-asserted-by":"publisher","key":"ref54","DOI":"10.1145\/2540708.2540747"},{"doi-asserted-by":"publisher","key":"ref53","DOI":"10.1109\/HPCA.2014.6835965"},{"doi-asserted-by":"publisher","key":"ref52","DOI":"10.1145\/2541940.2541942"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1109\/CICC.1992.591865"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1109\/ISCA.2006.23"},{"doi-asserted-by":"publisher","key":"ref40","DOI":"10.1109\/HPCA.2012.6168955"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1109\/PACT.2011.21"},{"key":"ref13","article-title":"BATMAN: Maximizing Bandwidth Utilization of Hybrid Memory Systems","author":"chou","year":"2015","journal-title":"Georgia Institute of Technology Tech Rep TR-CARET-2015-01"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1145\/2000064.2000076"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1109\/SAAHPC.2011.29"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1145\/2451116.2451157"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/SUPERC.1994.344301"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1145\/2674005.2674994"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1145\/2701618"},{"year":"2014","journal-title":"AMD Corporation","article-title":"Compute Cores","key":"ref4"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1109\/MICRO.2012.39"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1145\/362686.362692"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1109\/ISPASS.2009.4919648"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1109\/ISCA.2005.31"},{"doi-asserted-by":"publisher","key":"ref49","DOI":"10.1016\/j.jalgor.2003.12.002"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1007\/11841036_61"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1109\/IISWC.2009.5306797"},{"year":"2014","journal-title":"NVIDIA Corporation","article-title":"NVIDIA Launches World's First High-Speed GPU Interconnect, Helping Pave the Way to Exascale Computing","key":"ref46"},{"year":"2013","journal-title":"NVIDIA Corporation","article-title":"Unified Memory in CUDA 6","key":"ref45"},{"year":"2015","journal-title":"NVIDIA Corporation","article-title":"New NVIDIA TITAN X GPU Powers Virtual Experience &#x201C;Thief in the Shadows&#x201D; at GDC","key":"ref48"},{"year":"2015","journal-title":"NVIDIA Corporation","article-title":"CUDA C Programming Guild v7.0","key":"ref47"},{"doi-asserted-by":"publisher","key":"ref42","DOI":"10.1109\/HPCA.2015.7056027"},{"doi-asserted-by":"publisher","key":"ref41","DOI":"10.1145\/2209249.2209269"},{"doi-asserted-by":"publisher","key":"ref44","DOI":"10.1109\/HPCA.2001.903254"},{"doi-asserted-by":"publisher","key":"ref43","DOI":"10.1109\/ISCA.2005.42"}],"event":{"name":"2016 IEEE International Symposium on High Performance Computer Architecture (HPCA)","start":{"date-parts":[[2016,3,12]]},"location":"Barcelona, Spain","end":{"date-parts":[[2016,3,16]]}},"container-title":["2016 IEEE International Symposium on High Performance Computer Architecture (HPCA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7440961\/7446041\/7446089.pdf?arnumber=7446089","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,17]],"date-time":"2022-06-17T01:48:57Z","timestamp":1655430537000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7446089\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,3]]},"references-count":70,"URL":"https:\/\/doi.org\/10.1109\/hpca.2016.7446089","relation":{},"subject":[],"published":{"date-parts":[[2016,3]]}}}