{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,11]],"date-time":"2026-06-11T16:28:55Z","timestamp":1781195335198,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":84,"publisher":"ACM","license":[{"start":{"date-parts":[[2015,6,13]],"date-time":"2015-06-13T00:00:00Z","timestamp":1434153600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2015,6,13]]},"DOI":"10.1145\/2749469.2750399","type":"proceedings-article","created":{"date-parts":[[2015,5,26]],"date-time":"2015-05-26T10:36:25Z","timestamp":1432636585000},"page":"41-53","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":73,"title":["A case for core-assisted bottleneck acceleration in GPUs"],"prefix":"10.1145","author":[{"given":"Nandita","family":"Vijaykumar","sequence":"first","affiliation":[{"name":"Carnegie Mellon University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Gennady","family":"Pekhimenko","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Adwait","family":"Jog","sequence":"additional","affiliation":[{"name":"Pennsylvania State University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Abhishek","family":"Bhowmick","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Rachata","family":"Ausavarungnirun","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chita","family":"Das","sequence":"additional","affiliation":[{"name":"Pennsylvania State University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mahmut","family":"Kandemir","sequence":"additional","affiliation":[{"name":"Pennsylvania State University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Todd C.","family":"Mowry","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Onur","family":"Mutlu","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2015,6,13]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2004.10028"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1147\/rd.452.0287"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2013.6522337"},{"key":"e_1_3_2_1_4_1","volume-title":"ISCA","author":"Alameldeen A.","year":"2004","unstructured":"A. Alameldeen Adaptive Cache Compression for High-Performance Processors . In ISCA , 2004 . A. Alameldeen et al. Adaptive Cache Compression for High-Performance Processors. In ISCA, 2004."},{"key":"e_1_3_2_1_5_1","volume-title":"Wisconsin","author":"Alameldeen A.","year":"2004","unstructured":"A. Alameldeen Frequent Pattern Compression: A Significance-Based Compression Scheme for L2 Caches. Technical report, U . Wisconsin , 2004 . A. Alameldeen et al. Frequent Pattern Compression: A Significance-Based Compression Scheme for L2 Caches. Technical report, U. Wisconsin, 2004."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2007.346200"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/377792.377835"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2005.119"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2011.79"},{"key":"e_1_3_2_1_10_1","volume-title":"ISCA","author":"Arelakis A.","year":"2014","unstructured":"A. Arelakis : A Statistical Compression Cache Scheme . In ISCA , 2014 . A. Arelakis et al. SC2: A Statistical Compression Cache Scheme. In ISCA, 2014."},{"key":"e_1_3_2_1_11_1","volume-title":"ISCA","author":"Arnau J.","year":"2012","unstructured":"J. Arnau Boosting mobile GPU performance with a decoupled access\/execute fragment processor . In ISCA , 2012 . J. Arnau et al. Boosting mobile GPU performance with a decoupled access\/execute fragment processor. In ISCA, 2012."},{"key":"e_1_3_2_1_12_1","volume-title":"ISCA","author":"Arnau J.","year":"2014","unstructured":"J. Arnau Eliminating Redundant Fragment Shader Executions on a Mobile GPU via Hardware Memoization . In ISCA , 2014 . J. Arnau et al. Eliminating Redundant Fragment Shader Executions on a Mobile GPU via Hardware Memoization. In ISCA, 2014."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/12.381947"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2063384.2063400"},{"key":"e_1_3_2_1_16_1","volume-title":"MTEAC","author":"Brown J. A.","year":"2001","unstructured":"J. A. Brown Speculative precomputation on chip multiprocessors . In MTEAC , 2001 . J. A. Brown et al. Speculative precomputation on chip multiprocessors. In MTEAC, 2001."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2012.6402918"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2008.2010150"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/300979.300995"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.5555\/774861.774870"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2009.2020989"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/291069.291056"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/563998.564037"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/379240.379248"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.5555\/320080.320104"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/977091.977116"},{"key":"e_1_3_2_1_28_1","volume-title":"USC","author":"Dubois M.","year":"1998","unstructured":"M. Dubois Assisted execution. Technical report , USC , 1998 . M. Dubois et al. Assisted execution. Technical report, USC, 1998."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/1542275.1542288"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669154"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2009.4798232"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/2000064.2000081"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2005.6"},{"key":"e_1_3_2_1_34_1","volume-title":"MICRO","author":"J. W.","year":"1992","unstructured":"J. W. C. Fu et al. Stride directed prefetching in scalar processors . In MICRO , 1992 . J. W. C. Fu et al. Stride directed prefetching in scalar processors. In MICRO, 1992."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155675"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2000064.2000093"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.18"},{"key":"e_1_3_2_1_38_1","unstructured":"GPGPU-Sim v3.2.1. GPGPU-Sim Manual.  GPGPU-Sim v3.2.1. GPGPU-Sim Manual."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/1454115.1454152"},{"key":"e_1_3_2_1_40_1","volume-title":"HPCA","author":"Huang J.","year":"1999","unstructured":"J. Huang Exploiting basic block value locality with block reuse . In HPCA , 1999 . J. Huang et al. Exploiting basic block value locality with block reuse. In HPCA, 1999."},{"key":"e_1_3_2_1_41_1","unstructured":"Hynix. Hynix GDDR5 SGRAM Part H5GQ1H24AFR Revision 1.0.  Hynix. Hynix GDDR5 SGRAM Part H5GQ1H24AFR Revision 1.0."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.5555\/822080.822798"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2009.29"},{"key":"e_1_3_2_1_44_1","unstructured":"ITRS. International technology roadmap for semiconductors. 2011.  ITRS. International technology roadmap for semiconductors. 2011."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485951"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2451116.2451158"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/325164.325162"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/1950365.1950411"},{"key":"e_1_3_2_1_49_1","volume-title":"PACT","author":"Kayiran O.","year":"2013","unstructured":"O. Kayiran Neither More Nor Less: Optimizing Thread-level Parallelism for GPGPUs . In PACT , 2013 . O. Kayiran et al. Neither More Nor Less: Optimizing Thread-level Parallelism for GPGPUs. In PACT, 2013."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2011.89"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/605397.605415"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835970"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2010.44"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485964"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2008.31"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2005.18"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/379240.379250"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815992"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/377792.377856"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155656"},{"key":"e_1_3_2_1_61_1","volume-title":"Apparatus, system, and method for coalescing parallel memory requests","author":"Nordquist B. S.","year":"2009","unstructured":"B. S. Nordquist Apparatus, system, and method for coalescing parallel memory requests , 2009 . US Patent 7,492,368. B. S. Nordquist et al. Apparatus, system, and method for coalescing parallel memory requests, 2009. US Patent 7,492,368."},{"key":"e_1_3_2_1_62_1","unstructured":"NVIDIA. CUDA C\/C++ SDK Code Samples 2011.  NVIDIA. CUDA C\/C++ SDK Code Samples 2011."},{"key":"e_1_3_2_1_63_1","volume-title":"Systems and methods for coalescing memory accesses of parallel threads","author":"Nyland L.","year":"2011","unstructured":"L. Nyland Systems and methods for coalescing memory accesses of parallel threads , 2011 . US Patent 8,086,806. L. Nyland et al. Systems and methods for coalescing memory accesses of parallel threads, 2011. US Patent 8,086,806."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/191995.192014"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/2370816.2370870"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540724"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056021"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.16"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.5555\/580550.876429"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540711"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540715"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1145\/2370816.2370864"},{"key":"e_1_3_2_1_73_1","volume-title":"PACT","author":"Sethia A.","year":"2013","unstructured":"A. Sethia : adaptive prefetching on gpus for energy efficiency . In PACT , 2013 . A. Sethia et al. Apogee: adaptive prefetching on gpus for energy efficiency. In PACT, 2013."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.16"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835972"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.5555\/17956.17961"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/264107.264200"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2007.346185"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1145\/378993.379247"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/MAHC.1980.10044"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2008.28"},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.1145\/360128.360154"},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2007.346187"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1145\/379240.379246"}],"event":{"name":"ISCA '15: The 42nd Annual International Symposium on Computer Architecture","location":"Portland Oregon","acronym":"ISCA '15","sponsor":["IEEE TCCA IEEE Computer Society Technical Committee on Computer Architecture","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 42nd Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2749469.2750399","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2749469.2750399","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T03:00:40Z","timestamp":1750215640000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2749469.2750399"}},"subtitle":["enabling flexible data compression with assist warps"],"short-title":[],"issued":{"date-parts":[[2015,6,13]]},"references-count":84,"alternative-id":["10.1145\/2749469.2750399","10.1145\/2749469"],"URL":"https:\/\/doi.org\/10.1145\/2749469.2750399","relation":{"is-identical-to":[{"id-type":"doi","id":"10.1145\/2872887.2750399","asserted-by":"object"}]},"subject":[],"published":{"date-parts":[[2015,6,13]]},"assertion":[{"value":"2015-06-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}