{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T16:52:05Z","timestamp":1771951925078,"version":"3.50.1"},"reference-count":43,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,2,27]],"date-time":"2021-02-27T00:00:00Z","timestamp":1614384000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,2,27]],"date-time":"2021-02-27T00:00:00Z","timestamp":1614384000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,2,27]],"date-time":"2021-02-27T00:00:00Z","timestamp":1614384000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,2,27]]},"DOI":"10.1109\/cgo51591.2021.9370339","type":"proceedings-article","created":{"date-parts":[[2021,3,11]],"date-time":"2021-03-11T21:33:26Z","timestamp":1615498406000},"page":"115-125","source":"Crossref","is-referenced-by-count":15,"title":["GPA: A GPU Performance Advisor Based on Instruction Sampling"],"prefix":"10.1109","author":[{"given":"Keren","family":"Zhou","sequence":"first","affiliation":[]},{"given":"Xiaozhu","family":"Meng","sequence":"additional","affiliation":[]},{"given":"Ryuichi","family":"Sai","sequence":"additional","affiliation":[]},{"given":"John","family":"Mellor-Crummey","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2005.28"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2014.6844459"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358307"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/2872887.2750375"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2015.14"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3126908.3126961"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2014.7116904"},{"key":"ref36","author":"sopeju","year":"2011","journal-title":"AutoScope Automatic suggestions for code optimizations using PerfExpert"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2010.41"},{"key":"ref34","year":"2020","journal-title":"NVIDIA Compute Sanitizer DA-05679&#x2013;001_v11 2"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/PMBS49563.2019.00014"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/2854038.2854044"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.1997.645821"},{"key":"ref12","article-title":"Instruction-based sampling: A new performance analysis technique for AMD family 10h processors","author":"drongowski","year":"2007","journal-title":"Advanced Micro Devices"},{"key":"ref13","first-page":"11","article-title":"Intel&#x00AE; 64 and IA-32 architectures software developer's manual","volume":"2","year":"2011","journal-title":"Volume 3b System Programming Guide"},{"key":"ref14","year":"0","journal-title":"NVIDIA Corporation"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ProTools49597.2019.00006"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"ref17","year":"2020","journal-title":"CUPTI User's Guide DA-05679&#x2013;001_v11 2"},{"key":"ref18","author":"jia","year":"2018","journal-title":"Dissecting the NVIDIA Volta GPU Architecture via Microbenchmarking"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3018743.3018755"},{"key":"ref28","year":"0","journal-title":"Advanced Micro Devices Inc"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3392717.3392752"},{"key":"ref27","author":"reinders","year":"2005","journal-title":"VTune Performance Analyzer Essentials"},{"key":"ref3","year":"0","journal-title":"NVIDIA Nsight Compute"},{"key":"ref6","first-page":"85","article-title":"Score- P: A unified performance measurement system for petascale applications","author":"mey","year":"2012","journal-title":"Competence in High Performance Computing 2010"},{"key":"ref29","author":"zhang","year":"2018","journal-title":"Data-centric performance measurement and mapping for highly parallel programming models"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1177\/1094342006064482"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3168831"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-16012-2_2"},{"key":"ref2","year":"0","journal-title":"(2020) NVIDIA Nsight Systems"},{"key":"ref9","doi-asserted-by":"crossref","DOI":"10.1109\/SC41405.2020.00093","article-title":"GVProf: A value profiler for GPU-based clusters","author":"zhou","year":"2020","journal-title":"Proceedings of the Conference on High Performance Computing Networking Storage and Analysis Ser SC '09"},{"key":"ref1","year":"2020","journal-title":"Profiler User's Guide DU-05982&#x2013;001_vll 2"},{"key":"ref20","year":"0","journal-title":"U of Wisconsin-Madison Dyninst"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3022671.2984003"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICSM.1997.624245"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2007.32"},{"key":"ref24","author":"lyakh","year":"0","journal-title":"ExaTENSOR"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2019.8661201"},{"key":"ref23","year":"0","journal-title":"Lawrence Livermore National Laboratory"},{"key":"ref26","author":"meng","year":"2020","journal-title":"Minimod A finite difference solver for seismic modeling"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2019.00014"},{"key":"ref25","year":"0","journal-title":"National Renewable Energy Laboratory"}],"event":{"name":"2021 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO)","location":"Seoul, Korea (South)","start":{"date-parts":[[2021,2,27]]},"end":{"date-parts":[[2021,3,3]]}},"container-title":["2021 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9370300\/9370301\/09370339.pdf?arnumber=9370339","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,21]],"date-time":"2022-12-21T06:10:15Z","timestamp":1671603015000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9370339\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,2,27]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/cgo51591.2021.9370339","relation":{},"subject":[],"published":{"date-parts":[[2021,2,27]]}}}