{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T07:58:03Z","timestamp":1768031883136,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":26,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,12]],"date-time":"2023-11-12T00:00:00Z","timestamp":1699747200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"DEEP-SEA","award":["955606"],"award-info":[{"award-number":["955606"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,12]]},"DOI":"10.1145\/3624062.3624208","type":"proceedings-article","created":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T13:53:39Z","timestamp":1699624419000},"page":"1392-1402","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["GPUscout: Locating Data Movement-related Bottlenecks on GPUs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-0337-9964","authenticated-orcid":false,"given":"Soumya","family":"Sen","sequence":"first","affiliation":[{"name":"Technical University Munich, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4120-9472","authenticated-orcid":false,"given":"Stepan","family":"Vanecek","sequence":"additional","affiliation":[{"name":"Technical University Munich, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9013-435X","authenticated-orcid":false,"given":"Martin","family":"Schulz","sequence":"additional","affiliation":[{"name":"Technical University Munich, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,11,12]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCSIM.2009.5192847"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","unstructured":"Lorenz Braun and Holger Fr\u00f6ning. 2019. CUDA Flux: A Lightweight Instruction Profiler for CUDA Applications. In 2019 IEEE\/ACM Performance Modeling Benchmarking and Simulation of High Performance Computer Systems (PMBS). 73\u201381. https:\/\/doi.org\/10.1109\/PMBS49563.2019.00014","DOI":"10.1109\/PMBS49563.2019.00014"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1177\/109434200001400303"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-28151-8_17"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2012.04.209"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.5555\/1753228.1753234"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3578244.3583736"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-31476-6_7"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2013.95"},{"key":"e_1_3_2_2_10_1","unstructured":"Elias Konstantinidis. 2015. mixbench. https:\/\/github.com\/ekondis\/mixbench . commit: 8a3585e3cf32a062192396cbc560afe6abb566d0."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2017.04.002"},{"key":"e_1_3_2_2_12_1","volume-title":"Identifying Optimization Opportunities Within Kernel Execution in GPU Codes. In Euro-Par Workshops.","author":"Lim V.","year":"2015","unstructured":"Robert\u00a0V. Lim, Allen\u00a0D. Malony, Boyana Norris, and Nicholas Chaimov. 2015. Identifying Optimization Opportunities Within Kernel Execution in GPU Codes. In Euro-Par Workshops."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1810085.1810105"},{"key":"e_1_3_2_2_14_1","volume-title":"International Conference on Parallel Computing.","author":"Mayanglambam Shangkar","year":"2009","unstructured":"Shangkar Mayanglambam, Allen\u00a0D. Malony, and Matthew\u00a0J. Sottile. 2009. Performance Measurement of Applications with GPU Acceleration using CUDA. In International Conference on Parallel Computing."},{"key":"e_1_3_2_2_15_1","volume-title":"Supercomputer 63","author":"Nagel E.","year":"1996","unstructured":"Wolfgang\u00a0E. Nagel, Alfred Arnold, Michael Weber, Hans-Christian Hoppe, and Karl Solchenbach. 1996. VAMPIR: Visualization and Analysis of MPI Resources. Supercomputer 63, Vol. XII, 1 (1996), 69\u201380. https:\/\/juser.fz-juelich.de\/record\/189233"},{"key":"e_1_3_2_2_16_1","unstructured":"NVIDIA. 2020. CUDA release: 10.2.89. https:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/. Accessed: 2023-04-15."},{"key":"e_1_3_2_2_17_1","unstructured":"NVIDIA. 2022. CUDA Profiling Tools Interface (CUPTI) release: 11.8.0. https:\/\/docs.nvidia.com\/cuda\/cupti\/index.html. Accessed: 2023-04-15."},{"key":"e_1_3_2_2_18_1","unstructured":"NVIDIA. 2023. CUDA Binary Utilities release: 12.0. https:\/\/docs.nvidia.com\/cuda\/cuda-binary-utilities\/. Accessed: 2023-04-15."},{"key":"e_1_3_2_2_19_1","unstructured":"NVIDIA. 2023. CUDA Profiler release: 12.1. https:\/\/docs.nvidia.com\/cuda\/pdf\/CUDA_Profiler_Users_Guide.pdf. Accessed: 2023-04-15."},{"key":"e_1_3_2_2_20_1","volume-title":"Kernel Profiling Guide, release","author":"NVIDIA.","year":"2022","unstructured":"NVIDIA. 2023. Kernel Profiling Guide, release: 2022.4.1. https:\/\/docs.nvidia.com\/nsight-compute\/ProfilingGuide\/index.html. Accessed: 2023-04-15."},{"key":"e_1_3_2_2_21_1","volume-title":"Nsight Compute CLI, release","author":"NVIDIA.","year":"2022","unstructured":"NVIDIA. 2023. Nsight Compute CLI, release: 2022.4.1. https:\/\/docs.nvidia.com\/nsight-compute\/NsightComputeCli\/index.html. Accessed: 2023-04-15."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.19026\/rjaset.6.3452"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342006064482"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.3591"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2021.102837"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO51591.2021.9370339"}],"event":{"name":"SC-W 2023: Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis","location":"Denver CO USA","acronym":"SC-W 2023"},"container-title":["Proceedings of the SC '23 Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624208","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3624062.3624208","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T03:05:24Z","timestamp":1755745524000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624062.3624208"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,12]]},"references-count":26,"alternative-id":["10.1145\/3624062.3624208","10.1145\/3624062"],"URL":"https:\/\/doi.org\/10.1145\/3624062.3624208","relation":{},"subject":[],"published":{"date-parts":[[2023,11,12]]},"assertion":[{"value":"2023-11-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}