{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T11:12:50Z","timestamp":1773918770264,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2013,12,7]],"date-time":"2013-12-07T00:00:00Z","timestamp":1386374400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000143","name":"Division of Computing and Communication Foundations","doi-asserted-by":"publisher","award":["CCF-095360, CCF-1016262, CNS-1217102"],"award-info":[{"award-number":["CCF-095360, CCF-1016262, CNS-1217102"]}],"id":[{"id":"10.13039\/100000143","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100004311","name":"Advanced Micro Devices","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100004311","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000144","name":"Division of Computer and Network Systems","doi-asserted-by":"publisher","award":["CCF-095360, CCF-1016262, CNS-1217102"],"award-info":[{"award-number":["CCF-095360, CCF-1016262, CNS-1217102"]}],"id":[{"id":"10.13039\/100000144","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000028","name":"Semiconductor Research Corporation","doi-asserted-by":"publisher","award":["2080.001"],"award-info":[{"award-number":["2080.001"]}],"id":[{"id":"10.13039\/100000028","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2013,12,7]]},"DOI":"10.1145\/2540708.2540716","type":"proceedings-article","created":{"date-parts":[[2013,12,17]],"date-time":"2013-12-17T13:36:21Z","timestamp":1387287381000},"page":"74-85","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Exploiting GPU peak-power and performance tradeoffs through reduced effective pipeline latency"],"prefix":"10.1145","author":[{"given":"Syed Zohaib","family":"Gilani","sequence":"first","affiliation":[{"name":"The University of Wisconsin--Madison"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nam Sung","family":"Kim","sequence":"additional","affiliation":[{"name":"The University of Wisconsin--Madison"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael J.","family":"Schulte","sequence":"additional","affiliation":[{"name":"AMD Research, Advanced Micro Devices, Inc."}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2013,12,7]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"834","volume":"7","author":"Liu S.","year":"2010","unstructured":"S. Liu , E. Lindholm , M. Siu , B. Coon and S. Oberman , \"Operand collector architecture\". US Patent 7 , 834 ,881 B2, 16 November 2010 . S. Liu, E. Lindholm, M. Siu, B. Coon and S. Oberman, \"Operand collector architecture\". US Patent 7,834,881 B2, 16 November 2010.","journal-title":"\"Operand collector architecture\". US Patent"},{"key":"e_1_3_2_1_2_1","volume-title":"Apparatus, system, and method for coalescing parallel memory requests\". US Patent 7492368","author":"Nordquist B.","year":"2009","unstructured":"B. Nordquist and S. Lew , \" Apparatus, system, and method for coalescing parallel memory requests\". US Patent 7492368 , 17 February 2009 . B. Nordquist and S. Lew, \"Apparatus, system, and method for coalescing parallel memory requests\". US Patent 7492368, 17 February 2009."},{"key":"e_1_3_2_1_3_1","unstructured":"NVIDIA \"CUDA C best practices guide \" 2011.  NVIDIA \"CUDA C best practices guide \" 2011."},{"key":"e_1_3_2_1_4_1","volume-title":"Fermi","author":"CUDA","year":"2009","unstructured":"NVIDIA, \"NVIDIA's next generation CUDA compute architecture : Fermi ,\" 2009 . NVIDIA, \"NVIDIA's next generation CUDA compute architecture: Fermi,\" 2009."},{"key":"e_1_3_2_1_5_1","unstructured":"Nvidia {Online}. Available: http:\/\/www.geforce.com\/Active\/en_US\/en_US\/pdf\/GeForce-GTX-680-Whitepaper-FINAL.pdf.  Nvidia {Online}. Available: http:\/\/www.geforce.com\/Active\/en_US\/en_US\/pdf\/GeForce-GTX-680-Whitepaper-FINAL.pdf."},{"key":"e_1_3_2_1_6_1","unstructured":"Microway \"GPGPU architecture comparison of NVIDIA and ATI GPUs \" June 2010. {Online}. Available: http:\/\/www.microway.com\/pdfs\/GPGPU_Architecture_and_Performance_Comparison.pdf.  Microway \"GPGPU architecture comparison of NVIDIA and ATI GPUs \" June 2010. {Online}. Available: http:\/\/www.microway.com\/pdfs\/GPGPU_Architecture_and_Performance_Comparison.pdf."},{"key":"e_1_3_2_1_7_1","unstructured":"Nvidia 2012. {Online}. Available: http:\/\/www.geforce.com\/hardware\/desktop-gpus\/geforce-gtx-480\/specifications.  Nvidia 2012. {Online}. Available: http:\/\/www.geforce.com\/hardware\/desktop-gpus\/geforce-gtx-480\/specifications."},{"key":"e_1_3_2_1_8_1","volume-title":"Some limits of power delivery in the multicore era,\" in Workshop on Energy efficient Design","author":"Zhang R.","year":"2012","unstructured":"R. Zhang , B. Meyer , W. Huang , K. Skadron and M. Stan , \" Some limits of power delivery in the multicore era,\" in Workshop on Energy efficient Design , 2012 . R. Zhang, B. Meyer, W. Huang, K. Skadron and M. Stan, \"Some limits of power delivery in the multicore era,\" in Workshop on Energy efficient Design, 2012."},{"key":"e_1_3_2_1_9_1","unstructured":"Intel Intel\u00ae Core#8482; i7-900 desktop processor extreme edition series and Intel\u00ae Core#8482; i7-900 desktop processor series 2010.  Intel Intel\u00ae Core#8482; i7-900 desktop processor extreme edition series and Intel\u00ae Core#8482; i7-900 desktop processor series 2010."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2000064.2000093"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2010.85"},{"key":"e_1_3_2_1_13_1","volume-title":"Analyzing CUDA workloads using a detailed GPU simulator,\" in IEEE International Symposium on Performance Analysis of Systems and Software","author":"Bakhoda A.","year":"2009","unstructured":"A. Bakhoda , G. Yuan , W. Fung , H. Wong and T. Aamodt , \" Analyzing CUDA workloads using a detailed GPU simulator,\" in IEEE International Symposium on Performance Analysis of Systems and Software , 2009 . A. Bakhoda, G. Yuan, W. Fung, H. Wong and T. Aamodt, \"Analyzing CUDA workloads using a detailed GPU simulator,\" in IEEE International Symposium on Performance Analysis of Systems and Software, 2009."},{"key":"e_1_3_2_1_14_1","unstructured":"Nvidia\u00ae \"CUDA warps and occupancy \" {Online}. Available: http:\/\/developer.download.nvidia.com\/CUDA\/training\/cuda_webinars_WarpsAndOccupancy.pdf.  Nvidia\u00ae \"CUDA warps and occupancy \" {Online}. Available: http:\/\/developer.download.nvidia.com\/CUDA\/training\/cuda_webinars_WarpsAndOccupancy.pdf."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485964"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815998"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ARITH.2009.17"},{"key":"e_1_3_2_1_18_1","volume-title":"Morgan Kaufmann","author":"Ercegovac M.","year":"2004","unstructured":"M. Ercegovac and T. Lang , Digital arithmetic , Morgan Kaufmann , 2004 . M. Ercegovac and T. Lang, Digital arithmetic, Morgan Kaufmann, 2004."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASAP.2011.6043260"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/775832.776032"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/313817.313881"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/774572.774601"},{"key":"e_1_3_2_1_23_1","volume-title":"A fully-integrated switched-capacitor 2:1 voltage converter with regulation capability and 90% efficiency at 2.3A\/mm2,\" in IEEE Symposium on VLSI Circuits (VLSIC)","author":"Chang L.","year":"2010","unstructured":"L. Chang , R. Montoye , B. Ji , A. Weger , K. Stawiasz and R. Dennard , \" A fully-integrated switched-capacitor 2:1 voltage converter with regulation capability and 90% efficiency at 2.3A\/mm2,\" in IEEE Symposium on VLSI Circuits (VLSIC) , 2010 . L. Chang, R. Montoye, B. Ji, A. Weger, K. Stawiasz and R. Dennard, \"A fully-integrated switched-capacitor 2:1 voltage converter with regulation capability and 90% efficiency at 2.3A\/mm2,\" in IEEE Symposium on VLSI Circuits (VLSIC), 2010."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669172"},{"key":"e_1_3_2_1_25_1","unstructured":"HP {Online}. Available: http:\/\/quid.hp1.hp.com:9081\/cacti.  HP {Online}. Available: http:\/\/quid.hp1.hp.com:9081\/cacti."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2010.121"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155675"},{"key":"e_1_3_2_1_28_1","volume-title":"Demystifying GPU microarchitecture through microbenchmarking,\" in IEEE International Symposium on Performance Analysis of Systems & Software (ISPASS)","author":"Wong H.","year":"2010","unstructured":"H. Wong , M. Papadopoulou , M. Sadooghi-Alvandi and A. Moshovos , \" Demystifying GPU microarchitecture through microbenchmarking,\" in IEEE International Symposium on Performance Analysis of Systems & Software (ISPASS) , 2010 . H. Wong, M. Papadopoulou, M. Sadooghi-Alvandi and A. Moshovos, \"Demystifying GPU microarchitecture through microbenchmarking,\" in IEEE International Symposium on Performance Analysis of Systems & Software (ISPASS), 2010."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155656"},{"key":"e_1_3_2_1_30_1","volume-title":"Optimizing pipelines for power and performance,\" in IEEE\/ACM International Symposium on Microarchitecture","author":"Srinivasan V.","year":"2002","unstructured":"V. Srinivasan , D. Brooks , M. Gschwind , P. Bose , V. Zyuban , P. Strenski and P. Emma , \" Optimizing pipelines for power and performance,\" in IEEE\/ACM International Symposium on Microarchitecture , 2002 . V. Srinivasan, D. Brooks, M. Gschwind, P. Bose, V. Zyuban, P. Strenski and P. Emma, \"Optimizing pipelines for power and performance,\" in IEEE\/ACM International Symposium on Microarchitecture, 2002."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2012.6169032"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/12.841125"}],"event":{"name":"MICRO-46: The 46th Annual IEEE\/ACM International Symposium on Microarchitecture","location":"Davis California","acronym":"MICRO-46","sponsor":["SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing","IEEE CS"]},"container-title":["Proceedings of the 46th Annual IEEE\/ACM International Symposium on Microarchitecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2540708.2540716","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2540708.2540716","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T08:10:33Z","timestamp":1750234233000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2540708.2540716"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,12,7]]},"references-count":32,"alternative-id":["10.1145\/2540708.2540716","10.1145\/2540708"],"URL":"https:\/\/doi.org\/10.1145\/2540708.2540716","relation":{},"subject":[],"published":{"date-parts":[[2013,12,7]]},"assertion":[{"value":"2013-12-07","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}