{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T04:07:31Z","timestamp":1750306051314,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,6,18]],"date-time":"2017-06-18T00:00:00Z","timestamp":1497744000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,6,18]]},"DOI":"10.1145\/3061639.3062304","type":"proceedings-article","created":{"date-parts":[[2017,6,13]],"date-time":"2017-06-13T12:18:42Z","timestamp":1497356322000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Power-aware Performance Tuning of GPU Applications Through Microbenchmarking"],"prefix":"10.1145","author":[{"given":"Nicola","family":"Bombieri","sequence":"first","affiliation":[{"name":"University of Verona"}]},{"given":"Federico","family":"Busato","sequence":"additional","affiliation":[{"name":"University of Verona"}]},{"given":"Franco","family":"Fummi","sequence":"additional","affiliation":[{"name":"University of Verona"}]}],"member":"320","published-online":{"date-parts":[[2017,6,18]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"NVIDIA Tegra X1. http:\/\/www.nvidia.com\/object\/tegra.html.  NVIDIA Tegra X1. http:\/\/www.nvidia.com\/object\/tegra.html."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/SECON.2010.5453824"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.5555\/2971808.2972104"},{"key":"e_1_3_2_1_4_1","volume-title":"Professional Cuda C Programming","author":"Cheng J.","year":"2014","unstructured":"J. Cheng , M. Grossman , and T. McKercher . Professional Cuda C Programming . John Wiley & Sons , 2014 . J. Cheng, M. Grossman, and T. McKercher. Professional Cuda C Programming. John Wiley & Sons, 2014."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2687356"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2007.445"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815998"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2013.6657064"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/PDP.2016.120"},{"key":"e_1_3_2_1_10_1","volume-title":"Faster Parallel Reductions on Kepler","author":"Luitjens J.","year":"2014","unstructured":"J. Luitjens . Faster Parallel Reductions on Kepler , 2014 . https:\/\/devblogs.nvidia.com\/parallelforall\/faster-parallel-reductions-kepler\/. J. Luitjens. Faster Parallel Reductions on Kepler, 2014. https:\/\/devblogs.nvidia.com\/parallelforall\/faster-parallel-reductions-kepler\/."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2012.31"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"X. Mei K. Zhao C. Liu and X. Chu. Benchmarking the memory hierarchy of modern GPUs. Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics) 8707 LNCS:144--156 2014.  X. Mei K. Zhao C. Liu and X. Chu. Benchmarking the memory hierarchy of modern GPUs. Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics) 8707 LNCS:144--156 2014.","DOI":"10.1007\/978-3-662-44917-2_13"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2636342"},{"key":"e_1_3_2_1_14_1","volume-title":"E97-D(6):1506--1515","author":"Nishikawa N.","year":"2014","unstructured":"N. Nishikawa , K. Iwai , H. Tanaka , and T. Kurokawa . Throughput and power efficiency evaluation of block ciphers on kepler and GCN GPUs using micro-benchmark analysis. IEICE Transactions on Information and Systems , E97-D(6):1506--1515 , 2014 . N. Nishikawa, K. Iwai, H. Tanaka, and T. Kurokawa. Throughput and power efficiency evaluation of block ciphers on kepler and GCN GPUs using micro-benchmark analysis. IEICE Transactions on Information and Systems, E97-D(6):1506--1515, 2014."},{"key":"e_1_3_2_1_15_1","volume-title":"PTX: Parallel Thread Execution ISA","author":"NVIDIA.","year":"2015","unstructured":"NVIDIA. PTX: Parallel Thread Execution ISA , 2015 . http:\/\/docs.nvidia.com\/cuda\/parallel-thread-execution\/. NVIDIA. PTX: Parallel Thread Execution ISA, 2015. http:\/\/docs.nvidia.com\/cuda\/parallel-thread-execution\/."},{"key":"e_1_3_2_1_16_1","volume-title":"Programming guide","author":"CUDA.","year":"2015","unstructured":"Nvidia CUDA. Programming guide , 2015 . http:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide. Nvidia CUDA. Programming guide, 2015. http:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide."},{"key":"e_1_3_2_1_17_1","first-page":"28","article-title":"Optimizing matrix transpose in cuda","author":"Ruetsch G.","year":"2009","unstructured":"G. Ruetsch and P. Micikevicius . Optimizing matrix transpose in cuda . Nvidia CUDA SDK Application Note , 28 , 2009 . G. Ruetsch and P. Micikevicius. Optimizing matrix transpose in cuda. Nvidia CUDA SDK Application Note, 28, 2009.","journal-title":"Nvidia CUDA SDK Application Note"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2145816.2145819"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.5555\/2033408.2033459"},{"key":"e_1_3_2_1_20_1","first-page":"300","volume-title":"Proc. of ACM\/IEEE DATE","author":"Wang Y.","year":"2012","unstructured":"Y. Wang , S. Roy , and N. Ranganathan . Run-time power-gating in caches of gpus for leakage energy savings . In Proc. of ACM\/IEEE DATE , pages 300 -- 303 , 2012 . Y. Wang, S. Roy, and N. Ranganathan. Run-time power-gating in caches of gpus for leakage energy savings. In Proc. of ACM\/IEEE DATE, pages 300--303, 2012."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2010.5452013"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-014-1112-2"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2012.30"}],"event":{"name":"DAC '17: The 54th Annual Design Automation Conference 2017","sponsor":["EDAC Electronic Design Automation Consortium","SIGDA ACM Special Interest Group on Design Automation","IEEE-CEDA","SIGBED ACM Special Interest Group on Embedded Systems"],"location":"Austin TX USA","acronym":"DAC '17"},"container-title":["Proceedings of the 54th Annual Design Automation Conference 2017"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3061639.3062304","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3061639.3062304","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T03:03:26Z","timestamp":1750215806000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3061639.3062304"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,6,18]]},"references-count":23,"alternative-id":["10.1145\/3061639.3062304","10.1145\/3061639"],"URL":"https:\/\/doi.org\/10.1145\/3061639.3062304","relation":{},"subject":[],"published":{"date-parts":[[2017,6,18]]},"assertion":[{"value":"2017-06-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}