{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:26:10Z","timestamp":1750220770648,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,6,29]],"date-time":"2020-06-29T00:00:00Z","timestamp":1593388800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"publisher","award":["DE-AC05-00OR22725"],"award-info":[{"award-number":["DE-AC05-00OR22725"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006228","name":"Oak Ridge National Laboratory","doi-asserted-by":"publisher","award":["4000151982"],"award-info":[{"award-number":["4000151982"]}],"id":[{"id":"10.13039\/100006228","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006227","name":"Lawrence Livermore National Laboratory","doi-asserted-by":"publisher","award":["B617863"],"award-info":[{"award-number":["B617863"]}],"id":[{"id":"10.13039\/100006227","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100014718","name":"National Science Foundation","doi-asserted-by":"publisher","award":["ACI-1449918"],"award-info":[{"award-number":["ACI-1449918"]}],"id":[{"id":"10.13039\/100014718","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,6,29]]},"DOI":"10.1145\/3392717.3392759","type":"proceedings-article","created":{"date-parts":[[2020,6,29]],"date-time":"2020-06-29T18:49:02Z","timestamp":1593456542000},"page":"1-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Identifying and (automatically) remedying performance problems in CPU\/GPU applications"],"prefix":"10.1145","author":[{"given":"Benjamin","family":"Welton","sequence":"first","affiliation":[{"name":"University of Wisconsin - Madison"}]},{"given":"Barton P.","family":"Miller","sequence":"additional","affiliation":[{"name":"University of Wisconsin - Madison"}]}],"member":"320","published-online":{"date-parts":[[2020,6,29]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/98457.98518"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2628071.2628092"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2011.5764677"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/258915.258928"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2503210.2503278"},{"volume-title":"EuroPar Conference on Parallel Processing (EuroPar '03)","author":"Bell R.","key":"e_1_3_2_1_6_1"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1002\/spe.4380220502"},{"volume-title":"2016 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO). 12--23","author":"Chen D.","key":"e_1_3_2_1_8_1"},{"volume-title":"In 2nd ACM Workshop on Feedback-Directed Optimization (FDO.","author":"Cohn Robert","key":"e_1_3_2_1_9_1"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/SBAC-PAD.2009.26"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2813885.2737969"},{"key":"e_1_3_2_1_12_1","unstructured":"Richard Galvez and Greg van Anders. 2011. Accelerating the solution of families of shifted linear systems with CUDA. (2011). arXiv:hep-lat\/1102.2143  Richard Galvez and Greg van Anders. 2011. Accelerating the solution of families of shifted linear systems with CUDA. (2011). arXiv:hep-lat\/1102.2143"},{"volume-title":"the 2005 International Conference on Parallel Computing (PARCO '05)","author":"Gerndt M.","key":"e_1_3_2_1_13_1"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1147\/rd.521.0137"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2827872"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.27600.97285"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2013.6494997"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-31476-6_7"},{"volume-title":"Time, Clocks and the Ordering of Events in a Distributed System. Communications of the ACM 21, 7 (July","year":"1978","author":"Lamport Leslie","key":"e_1_3_2_1_19_1"},{"volume-title":"the 23rd International Conference on Parallel Computational Fluid Dynamics ((ParCFD '11)","author":"Layton S.","key":"e_1_3_2_1_20_1"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2000.10052"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2011.71"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/1810085.1810105"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/70082.68200"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/377792.377826"},{"key":"e_1_3_2_1_26_1","unstructured":"Nvidia. 2018. The Cuda FFT Library (9.2 ed.).  Nvidia. 2018. The Cuda FFT Library (9.2 ed.)."},{"key":"e_1_3_2_1_27_1","unstructured":"Nvidia. 2018. The Nvidia CUDA Profiler Users' Guide (9.2 ed.).  Nvidia. 2018. The Nvidia CUDA Profiler Users' Guide (9.2 ed.)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2019.8661201"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/93542.93550"},{"volume-title":"Gotcha: An Function-Wrapping Interface for HPC Tools","year":"2019","author":"Poliakoff David","key":"e_1_3_2_1_30_1"},{"key":"e_1_3_2_1_31_1","unstructured":"Paradyn Project. [n. d.]. Dyninst: Putting the Performance in High Performance Computing. http:\/\/www.dyninst.org  Paradyn Project. [n. d.]. Dyninst: Putting the Performance in High Performance Computing. http:\/\/www.dyninst.org"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342006064482"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3225058.3225096"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2009.5161054"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/782814.782850"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGRID.2018.00045"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGRID.2018.00045"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/1362622.1362674"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2009.5160988"}],"event":{"name":"ICS '20: 2020 International Conference on Supercomputing","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"],"location":"Barcelona Spain","acronym":"ICS '20"},"container-title":["Proceedings of the 34th ACM International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3392717.3392759","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3392717.3392759","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3392717.3392759","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:41:15Z","timestamp":1750200075000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3392717.3392759"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,6,29]]},"references-count":39,"alternative-id":["10.1145\/3392717.3392759","10.1145\/3392717"],"URL":"https:\/\/doi.org\/10.1145\/3392717.3392759","relation":{},"subject":[],"published":{"date-parts":[[2020,6,29]]},"assertion":[{"value":"2020-06-29","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}