{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T15:33:48Z","timestamp":1772724828484,"version":"3.50.1"},"reference-count":64,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013,2]]},"DOI":"10.1109\/hpca.2013.6522351","type":"proceedings-article","created":{"date-parts":[[2013,6,8]],"date-time":"2013-06-08T15:12:52Z","timestamp":1370704372000},"page":"578-590","source":"Crossref","is-referenced-by-count":114,"title":["Cache coherence for GPU architectures"],"prefix":"10.1109","author":[{"given":"I.","family":"Singh","sequence":"first","affiliation":[]},{"given":"A.","family":"Shriraman","sequence":"additional","affiliation":[]},{"given":"W. W. L.","family":"Fung","sequence":"additional","affiliation":[]},{"given":"M.","family":"O'Connor","sequence":"additional","affiliation":[]},{"given":"T. M.","family":"Aamodt","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"35","article-title":"The java memory model","author":"manson","year":"2005","journal-title":"POPL"},{"key":"36","doi-asserted-by":"publisher","DOI":"10.1145\/859618.859640"},{"key":"33","doi-asserted-by":"crossref","first-page":"48","DOI":"10.1109\/ISCA.1995.524548","article-title":"Dynamic self-invalidation: reducing coherence overhead in shared-memory multiprocessors","author":"lebeck","year":"1995","journal-title":"Proceedings 22nd Annual International Symposium on Computer Architecture ISCA"},{"key":"34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2011.6081367"},{"key":"39","doi-asserted-by":"publisher","DOI":"10.1145\/2209249.2209269"},{"key":"37","doi-asserted-by":"publisher","DOI":"10.1145\/378993.378998"},{"key":"38","doi-asserted-by":"crossref","DOI":"10.1145\/1105734.1105747","article-title":"Multifacet's general execution-driven multiprocessor simulator (GEMS) toolset","volume":"33","author":"martin","year":"2005","journal-title":"SIGARCH Comput Archit News"},{"key":"43","article-title":"An incessantly coherent cache scheme for shared memory multithreaded systems","author":"nandy","year":"1994","journal-title":"IWPP"},{"key":"42","doi-asserted-by":"publisher","DOI":"10.1109\/71.113080"},{"key":"41","doi-asserted-by":"publisher","DOI":"10.1145\/2145816.2145832"},{"key":"40","doi-asserted-by":"crossref","DOI":"10.1145\/2145816.2145831","article-title":"A GPU implementation of inclusionbased points-to analysis","author":"mendez-lojo","year":"2012","journal-title":"PPoPP"},{"key":"22","doi-asserted-by":"publisher","DOI":"10.1145\/1941553.1941590"},{"key":"23","year":"2012","journal-title":"Intel 64 and IA-32 Architectures Software Developers Manual"},{"key":"24","doi-asserted-by":"publisher","DOI":"10.1145\/165123.165154"},{"key":"25","doi-asserted-by":"publisher","DOI":"10.1109\/DATE.2009.5090700"},{"key":"26","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2011.89"},{"key":"27","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1816019"},{"key":"28","doi-asserted-by":"publisher","DOI":"10.1145\/1854273.1854291"},{"key":"29","year":"0"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1109\/2.546611"},{"key":"2","year":"1994","journal-title":"The PowerPC Architecture A Specification for a New Family of RISC Processors"},{"key":"1","year":"0"},{"key":"7","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.1988.5212"},{"key":"30","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2005.35"},{"key":"6","year":"2012","journal-title":"AMD Accelerated Parallel Processing OpenCL Pro-gramming Guide"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919636"},{"key":"32","doi-asserted-by":"publisher","DOI":"10.1147\/rd.516.0639"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.1990.134502"},{"key":"31","doi-asserted-by":"crossref","DOI":"10.1145\/264107.264206","article-title":"The SGI origin: A ccNUMA highly scalable server","author":"laudon","year":"1997","journal-title":"ISCA"},{"key":"9","article-title":"The gem5 simulator. SIGARCH Comput","volume":"39","author":"binkert","year":"2011","journal-title":"Architecture News"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"59","year":"1994","journal-title":"SPARC Architecture Manual (Version 9)"},{"key":"58","doi-asserted-by":"publisher","DOI":"10.2200\/S00346ED1V01Y201104CAC016"},{"key":"57","author":"sites","year":"1992","journal-title":"Alpha Architecture Reference Manual"},{"key":"56","author":"singh","year":"2013","journal-title":"Temporal Coherence Hardware Cache Coherence for GPU Architectures"},{"key":"19","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.1990.134503"},{"key":"55","article-title":"Inter-block gpu communication via fast barrier synchronization","author":"xiao","year":"2010","journal-title":"IPDPS"},{"key":"17","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2007.43"},{"key":"18","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2009.62"},{"key":"15","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"16","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2011.21"},{"key":"13","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-384988-5.00006-1"},{"key":"14","article-title":"On dynamic load balancing on graphics processors","author":"cederman","year":"2008","journal-title":"EUROGRAPHICS"},{"key":"11","author":"brookwood","year":"2010","journal-title":"Amd Fusion Family of Apus Enabling A Superior Immersive Pc Experience"},{"key":"12","author":"brownsword","year":"2009","journal-title":"Cloth in OpenCL"},{"key":"21","article-title":"Characterizing and evaluating a key-value store application on heterogeneous CPU-GPU Systems","author":"hetherington","year":"2012","journal-title":"ISPASS"},{"key":"20","doi-asserted-by":"crossref","DOI":"10.1145\/1345206.1345215","article-title":"FastForward for efficient pipeline parallelism: A cache-optimized concurrent lock-free queue","author":"giacomoni","year":"2008","journal-title":"PPoPP"},{"key":"64","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2011.10"},{"key":"62","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2010.5452013"},{"key":"63","article-title":"A Timestamp-based selective invalidation scheme formultiprocessor cache coherence","volume":"3","author":"yuan","year":"1996","journal-title":"ICPP"},{"key":"60","year":"2007","journal-title":"OpenSPARC T2 Core Microarchitecture Specification"},{"key":"61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2008.4563095"},{"key":"49","doi-asserted-by":"publisher","DOI":"10.1145\/2145694.2145708"},{"key":"48","doi-asserted-by":"publisher","DOI":"10.1145\/2370816.2370853"},{"key":"45","year":"2012","journal-title":"NVIDIA's Next Generation CUDA Compute Ar-chitecture Kepler GK110"},{"key":"44","year":"2009","journal-title":"NVIDIA's Next Generation CUDA Compute Ar-chitecture Fermi"},{"key":"47","doi-asserted-by":"publisher","DOI":"10.1145\/248621.248624"},{"key":"46","year":"2012","journal-title":"CUDA C Programming Guide v4 2"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1145\/1375581.1375591"},{"key":"51","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2012.6168950"},{"key":"52","author":"schoellkopf","year":"2008","journal-title":"SRAM Memory Device with Flash Clear and Corresponding Flash Clear Method"},{"key":"53","author":"seal","year":"2000","journal-title":"ARM Architecture Reference Manual"},{"key":"54","author":"shim","year":"2011","journal-title":"Library Cache Coherence"},{"key":"50","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2009.2034076"}],"event":{"name":"2013 IEEE 19th International Symposium on High Performance Computer Architecture (HPCA)","location":"Shenzhen","start":{"date-parts":[[2013,2,23]]},"end":{"date-parts":[[2013,2,27]]}},"container-title":["2013 IEEE 19th International Symposium on High Performance Computer Architecture (HPCA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6518038\/6522298\/06522351.pdf?arnumber=6522351","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,12]],"date-time":"2024-05-12T02:22:11Z","timestamp":1715480531000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6522351\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,2]]},"references-count":64,"URL":"https:\/\/doi.org\/10.1109\/hpca.2013.6522351","relation":{},"subject":[],"published":{"date-parts":[[2013,2]]}}}