{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T03:33:02Z","timestamp":1752982382990},"reference-count":37,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013,10]]},"DOI":"10.1109\/pact.2013.6618813","type":"proceedings-article","created":{"date-parts":[[2013,10,10]],"date-time":"2013-10-10T23:35:09Z","timestamp":1381448109000},"source":"Crossref","is-referenced-by-count":19,"title":["Reshaping cache misses to improve row-buffer locality in multicore systems"],"prefix":"10.1109","author":[{"given":"Onur","family":"Kayiran","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Adwait","family":"Jog","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mahmut T.","family":"Kandemir","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chita R.","family":"Das","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"19","doi-asserted-by":"publisher","DOI":"10.1145\/2451116.2451158"},{"key":"35","year":"0"},{"key":"17","doi-asserted-by":"publisher","DOI":"10.1145\/2304576.2304582"},{"key":"36","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2010.5452013"},{"key":"18","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485951"},{"key":"33","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.16"},{"key":"15","doi-asserted-by":"publisher","DOI":"10.1145\/1555754.1555775"},{"key":"34","author":"stratton","year":"2012","journal-title":"Scheduler-Based Prefetching for Multilevel Memories"},{"key":"16","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2012.6168944"},{"key":"13","article-title":"AMD's fusion finally arrives","author":"halfhill","year":"2010","journal-title":"Microprocessor Report"},{"key":"14","doi-asserted-by":"publisher","DOI":"10.1145\/1454115.1454152"},{"key":"37","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669119"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.30"},{"key":"12","doi-asserted-by":"publisher","DOI":"10.1145\/2000064.2000093"},{"key":"21","author":"kirk","year":"2010","journal-title":"Programming Massively Parallel Processors"},{"key":"20","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2011.89"},{"key":"22","article-title":"NVIDIA lowers the heat on kepler","author":"krewell","year":"2012","journal-title":"Microprocessor Report"},{"key":"23","doi-asserted-by":"publisher","DOI":"10.1109\/L-CA.2011.32"},{"key":"24","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2010.44"},{"key":"25","doi-asserted-by":"crossref","DOI":"10.1145\/1504176.1504194","article-title":"Openmp to GPGPU: A compiler framework for automatic translation and optimization","author":"lee","year":"2009","journal-title":"PPoPP"},{"key":"26","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2008.31"},{"key":"27","author":"munshi","year":"2011","journal-title":"The OpenCL Specification"},{"key":"28","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155656"},{"key":"29","author":"nvidia","year":"2010","journal-title":"CUDA C Programming Guide"},{"key":"3","article-title":"Throughput-effective on-chip networks for manycore accelerators","author":"bakhoda","year":"2010","journal-title":"Micro"},{"key":"2","article-title":"Staged memory scheduling: Achieving high prformance and scalability in heterogeneous systems","author":"ausavarungnirun","year":"2012","journal-title":"ISCA"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2011.5749714"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2012.6168946"},{"key":"30","year":"2011","journal-title":"CUDA C\/C++ SDK CODE Samples"},{"key":"7","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2011.141"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"32","doi-asserted-by":"publisher","DOI":"10.1145\/2366231.2337167"},{"key":"5","doi-asserted-by":"crossref","DOI":"10.1145\/2380403.2380431","article-title":"When less is more (LIMO):Controlled parallelism for improved efficiency","author":"chadha","year":"2012","journal-title":"CASES"},{"key":"31","year":"2011","journal-title":"Fermi NVIDIA's next Generation CUDA Compute Architecture"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"9","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155655"},{"key":"8","article-title":"GPU Cluster for high performance computing","author":"fan","year":"2004","journal-title":"SC"}],"event":{"name":"22nd International Conference on Parallel Architectures and Compilation Techniques (PACT)","location":"Edinburgh","start":{"date-parts":[[2013,9,7]]},"end":{"date-parts":[[2013,9,11]]}},"container-title":["Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6603429\/6618788\/06618813.pdf?arnumber=6618813","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,5]],"date-time":"2023-07-05T07:11:39Z","timestamp":1688541099000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6618813\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,10]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/pact.2013.6618813","relation":{},"subject":[],"published":{"date-parts":[[2013,10]]}}}