{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T09:34:51Z","timestamp":1763458491724,"version":"3.45.0"},"publisher-location":"New York, NY, USA","reference-count":92,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,9,11]],"date-time":"2017-09-11T00:00:00Z","timestamp":1505088000000},"content-version":"vor","delay-in-days":365,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["1205618"],"award-info":[{"award-number":["1205618"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,9,11]]},"DOI":"10.1145\/2967938.2967941","type":"proceedings-article","created":{"date-parts":[[2016,8,31]],"date-time":"2016-08-31T08:32:08Z","timestamp":1472632328000},"page":"17-30","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":25,"title":["\u03bcC-States"],"prefix":"10.1145","author":[{"given":"Onur","family":"Kayiran","sequence":"first","affiliation":[{"name":"Advanced Micro Devices, Inc., Sunnyvale, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Adwait","family":"Jog","sequence":"additional","affiliation":[{"name":"College of William &amp; Mary, Williamsburg, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ashutosh","family":"Pattnaik","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rachata","family":"Ausavarungnirun","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xulong","family":"Tang","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mahmut T.","family":"Kandemir","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gabriel H.","family":"Loh","sequence":"additional","affiliation":[{"name":"Advanced Micro Devices, Inc., Bellevue, WA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Onur","family":"Mutlu","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chita R.","family":"Das","sequence":"additional","affiliation":[{"name":"The Pennsylvania State University, University Park, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2016,9,11]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2013.6522337"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540719"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2003.1250883"},{"key":"e_1_3_2_1_4_1","volume-title":"Power-gating in a High-Performance GPU","author":"AMD","year":"2009","unstructured":"AMD, \"AMD: Power-gating in a High-Performance GPU,\" 2009. Available: http:\/\/www.powerforward.org\/media\/p\/125.aspxSTDinterwordspacing"},{"key":"e_1_3_2_1_5_1","unstructured":"AMD \"AMD Graphics Cores Next (GCN) Architecture \" 2012. Available: https:\/\/www.amd.com\/Documents\/GCN_Architecture_whitepaper.pdfSTDinterwordspacing"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.5555\/2337159.2337207"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2015.38"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/379240.379265"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"A. Bakhoda phet al. \"Analyzing CUDA Workloads Using a Detailed GPU Simulator \" in ISPASS 2009.","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/360128.360153"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2012.6402918"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/859618.859636"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/SBAC-PAD.2012.44"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.16"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1735688.1735702"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2013.6522311"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669150"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/1816038.1815976"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485966"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/1736020.1736058"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155663"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2008.44"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/L-CA.2013.9"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/384285.379266"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.12"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/2000064.2000093"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540716"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2013.6522310"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/1454115.1454152"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815998"},{"key":"e_1_3_2_1_32_1","unstructured":"M. Houston \"Anatomy of AMD's TeraScale Graphics Engine \" 2008. Available: http:\/\/s08.idav.ucdavis.edu\/houston-amd-terascale.pdf"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","unstructured":"Z. Hu phet al. \"Microarchitectural Techniques for Power Gating of Execution Units \" in phISLPED 2004. 10.1145\/1013235.1013249","DOI":"10.1145\/1013235.1013249"},{"key":"e_1_3_2_1_34_1","unstructured":"Intel \"Energy-Efficient Platforms - Considerations for Application Software and Services \" 2011. Available: http:\/\/www.intel.com\/content\/dam\/doc\/white-paper\/energy-efficient-platforms-2011-white-paper.pdf"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/1250662.1250686"},{"key":"e_1_3_2_1_36_1","volume-title":"Measurement, Simulation and Modeling,\" New York: John Willey","author":"Jain R.","year":"1991","unstructured":"R. Jain, \"The Art of Computer System Performance Analysis: Techniques for Experimental Design, Measurement, Simulation and Modeling,\" New York: John Willey, 1991."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485952"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/2150976.2151001"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485936"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485951"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/2451116.2451158"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/2896377.2901468"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.5555\/2523721.2523745"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.62"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.36"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.10"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"H. Kim phet al. \"Bounding Memory Interference Delay in COTS-based Multi-Core Systems \" in RTAS 2014.","DOI":"10.1109\/RTAS.2014.6925998"},{"key":"e_1_3_2_1_48_1","volume-title":"A Scalable and High-performance Scheduling Algorithm for Multiple Memory Controllers,\" in HPCA","author":"Y. Kim","year":"2010","unstructured":"Y. Kim phet al., \"ATLAS: A Scalable and High-performance Scheduling Algorithm for Multiple Memory Controllers,\" in HPCA, 2010."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2010.51"},{"key":"e_1_3_2_1_50_1","volume-title":"Spare Register Aware Prefetching for Graph Algorithms on GPUs,\" in HPCA","author":"Lakshminarayana N.","year":"2014","unstructured":"N. Lakshminarayana and H. Kim, \"Spare Register Aware Prefetching for Graph Algorithms on GPUs,\" in HPCA, 2014."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669155"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2011.17"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/2628071.2628107"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485964"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","unstructured":"H. Li phet al. \"Deterministic Clock Gating for Microprocessor Power Reduction \" in HPCA 2003.","DOI":"10.5555\/822080.822803"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1287\/opre.9.3.383"},{"key":"e_1_3_2_1_57_1","volume-title":"A GPGPU Power Simulator,\" in ISPASS","author":"J. Lucas","year":"2013","unstructured":"J. Lucas phet al., \"How a Single Chip Causes Massive Power Bills GPUSimPow: A GPGPU Power Simulator,\" in ISPASS, 2013."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/279358.279377"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815992"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/2463209.2488779"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/2000064.2000111"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.5555\/1362903.1362921"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/1400751.1400799"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155664"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.40"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2008.7"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155656"},{"key":"e_1_3_2_1_68_1","volume-title":"NVIDIA's Next Generation CUDA Compute Architecture","author":"Fermi NVIDIA","year":"2011","unstructured":"NVIDIA, \"Fermi: NVIDIA's Next Generation CUDA Compute Architecture,\" 2011."},{"key":"e_1_3_2_1_69_1","volume-title":"Kepler GK110","author":"A's Next NVIDIA","year":"2012","unstructured":"NVIDIA, \"NVIDIA's Next Generation CUDA Compute Architecture: Kepler GK110,\" 2012."},{"key":"e_1_3_2_1_70_1","unstructured":"NVIDIA \"Tegra 4 Family GPU Architecture \" 2013. Available: http:\/\/www.nvidia.com\/docs\/IO\/116757\/Tegra_4_GPU_Whitepaper_FINALv2.pdf"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/2342356.2342436"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.17"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2006.5"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1145\/339647.339685"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1145\/339647.339668"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.16"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540718"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"crossref","unstructured":"K. Roy phet al. \"Leakage Current Mechanisms and Leakage Reduction Techniques in Deep-submicrometer CMOS Circuits \" Proceedings of the IEEE 2003.","DOI":"10.1109\/JPROC.2002.808156"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2002.808156"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1145\/378993.379244"},{"key":"e_1_3_2_1_81_1","unstructured":"J. A. Stratton phet al. \"Parboil: A Revised Benchmark Suite for Scientific and Commercial Throughput Computing \" University of Illinois at Urbana-Champaign Tech. Rep. IMPACT-12-01 March 2012."},{"key":"e_1_3_2_1_82_1","volume-title":"Achieving High Performance and Fairness at Low Cost,\" in ICCD","author":"L. Subramanian","year":"2014","unstructured":"L. Subramanian phet al., \"The Blacklisting Memory Scheduler: Achieving High Performance and Fairness at Low Cost,\" in ICCD, 2014."},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830803"},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2013.6522356"},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1816020"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1145\/1508244.1508274"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","unstructured":"K. Van Craeynest phet al. \"Scheduling Heterogeneous Multi-cores Through Performance Impact Estimation (PIE) \" in ISCA 2012.","DOI":"10.5555\/2337159.2337184"},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750399"},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","unstructured":"B. Wang phet al. \"Exploring Hybrid Memory for GPU Energy Efficiency Through Software-hardware Co-design \" in PACT 2013.","DOI":"10.5555\/2523721.2523737"},{"key":"e_1_3_2_1_90_1","doi-asserted-by":"publisher","DOI":"10.1145\/2000064.2000094"},{"key":"e_1_3_2_1_91_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.47"},{"key":"e_1_3_2_1_92_1","first-page":"630","article-title":"Controller for a Synchronous DRAM that Maximizes Throughput by Allowing Memory Requests and Commands to be Issued Out of Order,\" no","volume":"5","author":"Zuravleff W. K.","year":"1997","unstructured":"W. K. Zuravleff and T. Robinson, \"Controller for a Synchronous DRAM that Maximizes Throughput by Allowing Memory Requests and Commands to be Issued Out of Order,\" no. U.S. Patent Number 5,630,096, Sep. 1997.","journal-title":"U.S. Patent Number"}],"event":{"name":"PACT '16: International Conference on Parallel Architectures and Compilation","sponsor":["IFIP WG 10.3 IFIP WG 10.3","IEEE TCCA IEEE Computer Society Technical Committee on Computer Architecture","SIGARCH ACM Special Interest Group on Computer Architecture","IEEE CS TCPP IEEE Computer Society Technical Committee on Parallel Processing"],"location":"Haifa Israel","acronym":"PACT '16"},"container-title":["Proceedings of the 2016 International Conference on Parallel Architectures and Compilation"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2967938.2967941","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2967938.2967941","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2967938.2967941","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T09:27:38Z","timestamp":1763458058000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2967938.2967941"}},"subtitle":["Fine-grained GPU Datapath Power Management"],"short-title":[],"issued":{"date-parts":[[2016,9,11]]},"references-count":92,"alternative-id":["10.1145\/2967938.2967941","10.1145\/2967938"],"URL":"https:\/\/doi.org\/10.1145\/2967938.2967941","relation":{},"subject":[],"published":{"date-parts":[[2016,9,11]]},"assertion":[{"value":"2016-09-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}