{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,24]],"date-time":"2026-04-24T01:21:00Z","timestamp":1776993660291,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":70,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,30]],"date-time":"2024-05-30T00:00:00Z","timestamp":1717027200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"European Research Council","doi-asserted-by":"publisher","award":["949587"],"award-info":[{"award-number":["949587"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Royal Society-Newton Advanced Fellowship","award":["NAF\\R2\\202207"],"award-info":[{"award-number":["NAF\\R2\\202207"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,30]]},"DOI":"10.1145\/3650200.3656597","type":"proceedings-article","created":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T14:11:54Z","timestamp":1717423914000},"page":"525-536","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Snoopie: A Multi-GPU Communication Profiler and Visualizer"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-1920-6773","authenticated-orcid":false,"given":"Mohammad Kefah Taha","family":"Issa","sequence":"first","affiliation":[{"name":"ParCoreLab, Ko\u00e7 University, Turkiye"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6166-4252","authenticated-orcid":false,"given":"Muhammad Aditya","family":"Sasongko","sequence":"additional","affiliation":[{"name":"ParCoreLab, Ko\u00e7 University, T\u00fcrkiye"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0384-6330","authenticated-orcid":false,"given":"Ilyas","family":"Turimbetov","sequence":"additional","affiliation":[{"name":"ParCoreLab, Ko\u00e7 University, Turkiye"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7235-6418","authenticated-orcid":false,"given":"Javid","family":"Baydamirli","sequence":"additional","affiliation":[{"name":"ParCoreLab, Ko\u00e7 University, Turkiye"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9603-2466","authenticated-orcid":false,"given":"Do\u011fan","family":"Sa\u011fbili","sequence":"additional","affiliation":[{"name":"ParCoreLab, Ko\u00e7 University, Turkiye"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2351-0770","authenticated-orcid":false,"given":"Didem","family":"Unat","sequence":"additional","affiliation":[{"name":"ParCoreLab, Ko\u00e7 University, Turkiye"}]}],"member":"320","published-online":{"date-parts":[[2024,6,3]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.5555\/1753228.1753233"},{"key":"e_1_3_2_1_2_1","unstructured":"Inc Advanced Micro\u00a0Devices. 2024. AMD ROCm Documentation. https:\/\/rocm.docs.amd.com\/."},{"key":"e_1_3_2_1_3_1","unstructured":"Inc Advanced Micro\u00a0Devices. 2024. Omnitrace. https:\/\/github.com\/AMDResearch\/omnitrace."},{"key":"e_1_3_2_1_4_1","unstructured":"Inc Advanced Micro\u00a0Devices. 2024. ROCm RCCL Documentation. https:\/\/rocm.docs.amd.com\/projects\/rccl\/en\/latest\/."},{"key":"e_1_3_2_1_5_1","unstructured":"Inc Advanced Micro\u00a0Devices. 2024. ROCSHMEM. https:\/\/github.com\/ROCm-DeveloperTools\/ROC_SHMEM."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGRID.2017.29"},{"key":"e_1_3_2_1_7_1","unstructured":"Meta AI. 2024. PyTorch. https:\/\/github.com\/pytorch\/pytorch."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-71058-3_10"},{"key":"e_1_3_2_1_9_1","unstructured":"Continuum Analytics. 2024. A Just-In-Time Compiler for Numerical Functions in Python. https:\/\/github.com\/numba\/numba."},{"key":"e_1_3_2_1_10_1","unstructured":"Google Brain. 2024. TensorFlow. https:\/\/github.com\/tensorflow\/tensorflow."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/71.780863"},{"key":"e_1_3_2_1_12_1","unstructured":"Barcelona\u00a0Supercomputing Center. [n. d.]. BSC-Performance-Tools: Extrae. https:\/\/tools.bsc.es\/extrae."},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis","author":"Chen Yuxin","year":"2022","unstructured":"Yuxin Chen, Benjamin Brock, Serban Porumbescu, Ayd\u0131n Bulu\u00e7, Katherine Yelick, and John\u00a0D. Owens. 2022. Scalable Irregular Parallelism with GPUs: Getting CPUs out of the Way. In Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis (Dallas, Texas) (SC \u201922). IEEE, New York, NY, USA, Article 50, 16\u00a0pages."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2019.00102"},{"key":"e_1_3_2_1_15_1","volume-title":"PARAVER: A tool to visualize and analyze parallel code. WoTUG-18 44 (03","author":"Computadors Departament","year":"1995","unstructured":"Departament Computadors, Vincent Pillet, Jes\u00fas Labarta, Toni Cortes, and Sergi Girona. 1995. PARAVER: A tool to visualize and analyze parallel code. WoTUG-18 44 (03 1995)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPC.2007.39"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3554977"},{"key":"e_1_3_2_1_18_1","unstructured":"Apache\u00a0Software Foundation. 2024. Apache MXNet for Deep Learning. https:\/\/github.com\/apache\/mxnet."},{"key":"e_1_3_2_1_19_1","unstructured":"Python\u00a0Software Foundation. 2024. Python\/C API Reference Manual. https:\/\/docs.python.org\/3\/c-api\/index.html."},{"key":"e_1_3_2_1_20_1","volume-title":"Pvm 3 User\u2019s Guide And Reference Manual. (11","author":"Geist Al","year":"1995","unstructured":"Al Geist, Adam Beguelin, Jack Dongarra, Weicheng Jiang, Robert Manchek, and Vaidy Sunderam. 1995. Pvm 3 User\u2019s Guide And Reference Manual. (11 1995)."},{"key":"e_1_3_2_1_21_1","volume-title":"Effect of Short Term Scheduling on Message Passing Multiprogrammed Systems. (11","author":"Girona Sergi","year":"1994","unstructured":"Sergi Girona, Toni Cortes, and Vincent Pillet. 1994. Effect of Short Term Scheduling on Message Passing Multiprogrammed Systems. (11 1994)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/InPar.2012.6339596"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3332466.3374544"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2015.21"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2005.55"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2005.29"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3577193.3593713"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","unstructured":"Mohammad Kefah\u00a0Taha Issa Didem Unat Dogan Sagbili Muhammad\u00a0Aditya Sasongko Ilyas Turimbetov and Javid Baydamirli. 2024. Snoopie Figures. https:\/\/doi.org\/10.6084\/m9.figshare.c.7190766.v1","DOI":"10.6084\/m9.figshare.c.7190766.v1"},{"key":"e_1_3_2_1_29_1","unstructured":"Wenzel Jakob. 2024. pybind11 \u2014 Seamless operability between C++11 and Python. https:\/\/github.com\/pybind\/pybind11."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1137\/S1064827595287997"},{"key":"e_1_3_2_1_31_1","volume-title":"TAU, and Vampir. In Tools for High Performance Computing","author":"Kn\u00fcpfer Andreas","year":"2011","unstructured":"Andreas Kn\u00fcpfer, Christian R\u00f6ssel, Dieter\u00a0an Mey, Scott Biersdorff, Kai Diethelm, Dominic Eschweiler, Markus Geimer, Michael Gerndt, Daniel Lorenz, Allen Malony, Wolfgang\u00a0E. Nagel, Yury Oleynik, Peter Philippen, Pavel Saviankou, Dirk Schmidl, Sameer Shende, Ronny Tsch\u00fcter, Michael Wagner, Bert Wesarg, and Felix Wolf. 2012. Score-P: A Joint Performance Measurement Run-Time Infrastructure for Periscope,Scalasca, TAU, and Vampir. In Tools for High Performance Computing 2011, Holger Brunst, Matthias\u00a0S. M\u00fcller, Wolfgang\u00a0E. Nagel, and Michael\u00a0M. Resch (Eds.). Springer Berlin Heidelberg, Berlin, Heidelberg, 79\u201391."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2019.00022"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3126908.3126950"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2018.8573483"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3582016.3582044"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00068"},{"key":"e_1_3_2_1_37_1","volume-title":"STREAM: Sustainable Memory Bandwidth in High Performance Computers.HPCWire https:\/\/www.hpcwire.com\/2016\/11\/07\/mccalpin-traces-hpc-system-balance-trends.","author":"McCalpin John","year":"2016","unstructured":"John McCalpin. 2016. STREAM: Sustainable Memory Bandwidth in High Performance Computers.HPCWire https:\/\/www.hpcwire.com\/2016\/11\/07\/mccalpin-traces-hpc-system-balance-trends."},{"key":"e_1_3_2_1_38_1","volume-title":"VAMPIR: Visualization and analysis of mpi resources. Supercomputer 12 (05","author":"Nagel Wolfgang","year":"1996","unstructured":"Wolfgang Nagel, Alfred Arnold, Michael Weber, Hans-Christian Hoppe, and Karl Solchenbach. 1996. VAMPIR: Visualization and analysis of mpi resources. Supercomputer 12 (05 1996)."},{"key":"e_1_3_2_1_39_1","unstructured":"Greg Nakhimovsky. 2001. Debugging and Performance Tuning with Library Interposers. http:\/\/dsc.sun.com\/solaris\/articles\/lib_interposers.html."},{"key":"e_1_3_2_1_40_1","unstructured":"NVIDIA. 2022. Compute Sanitizer. https:\/\/docs.nvidia.com\/cuda\/compute-sanitizer\/"},{"key":"e_1_3_2_1_41_1","unstructured":"NVIDIA. 2022. Multi GPU Programming Models NVSHMEM. https:\/\/github.com\/NVIDIA\/multi-gpu-programming-models\/tree\/master\/nvshmem_opt."},{"key":"e_1_3_2_1_42_1","unstructured":"NVIDIA. 2022. Multi GPU Programming Models P2P. https:\/\/github.com\/NVIDIA\/multi-gpu-programming-models\/tree\/master\/multi_threaded_p2p."},{"key":"e_1_3_2_1_43_1","unstructured":"NVIDIA. 2022. NCCL Tests. https:\/\/github.com\/NVIDIA\/nccl-tests."},{"key":"e_1_3_2_1_44_1","unstructured":"NVIDIA. 2022. Nvidia OpenSHMEM Library (NVSHMEM) documentation. https:\/\/docs.nvidia.com\/nvshmem\/api\/"},{"key":"e_1_3_2_1_45_1","unstructured":"NVIDIA. 2023. CUDA Binary Utilities. https:\/\/docs.nvidia.com\/cuda\/cuda-binary-utilities\/index.html."},{"key":"e_1_3_2_1_46_1","unstructured":"NVIDIA. 2023. CUPTI. https:\/\/docs.nvidia.com\/cupti\/"},{"key":"e_1_3_2_1_47_1","unstructured":"NVIDIA. 2023. NVIDIA CUDA Profiling Tools Interface (CUPTI) - CUDA Toolkit. https:\/\/developer.nvidia.com\/cupti"},{"key":"e_1_3_2_1_48_1","unstructured":"NVIDIA. 2023. NVIDIA Nsight Systems. https:\/\/developer.nvidia.com\/nsight-systems."},{"key":"e_1_3_2_1_49_1","unstructured":"NVIDIA. 2023. NVTAGS. https:\/\/developer.nvidia.com\/nvidia-nvtags."},{"key":"e_1_3_2_1_50_1","unstructured":"NVIDIA. 2023. NVTX. https:\/\/docs.nvidia.com\/nvtx\/."},{"key":"e_1_3_2_1_51_1","unstructured":"NVIDIA. 2023. Parallel Thread Execution ISA Version 8.2. https:\/\/docs.nvidia.com\/cuda\/parallel-thread-execution\/"},{"key":"e_1_3_2_1_52_1","unstructured":"NVIDIA. 2023. Parallel Thread Execution ISA Version 8.2. https:\/\/docs.nvidia.com\/cuda\/parallel-thread-execution\/"},{"key":"e_1_3_2_1_53_1","unstructured":"Nvidia. 2024. NCCL. https:\/\/github.com\/NVIDIA\/nccl."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2013.6702638"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3126908.3126914"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297663.3310299"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-09766-4_490"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2017.00037"},{"key":"e_1_3_2_1_59_1","unstructured":"Microsoft Research. 2024. deepspeed. https:\/\/github.com\/microsoft\/DeepSpeed."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356214"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.5555\/3571885.3571970"},{"key":"e_1_3_2_1_62_1","unstructured":"Martin Schulz. [n. d.]. MPIT: A New Interface for Performance Tools in MPI 3. http:\/\/cscads.rice.edu\/2010-08-cscads-mpit.pdf"},{"key":"e_1_3_2_1_63_1","volume-title":"Euro-Par 2021: Parallel Processing Workshops, Ricardo Chaves, Dora B.\u00a0Heras, Aleksandar Ilic, Didem Unat, Rosa\u00a0M. Badia, Andrea Bracciali, Patrick Diehl, Anshu Dubey, Oh\u00a0Sangyoon, Stephen L.\u00a0Scott","author":"Soyt\u00fcrk Muhammet\u00a0Abdullah","unstructured":"Muhammet\u00a0Abdullah Soyt\u00fcrk, Palwisha Akhtar, Erhan Tezcan, and Didem Unat. 2022. Monitoring Collective Communication Among GPUs. In Euro-Par 2021: Parallel Processing Workshops, Ricardo Chaves, Dora B.\u00a0Heras, Aleksandar Ilic, Didem Unat, Rosa\u00a0M. Badia, Andrea Bracciali, Patrick Diehl, Anshu Dubey, Oh\u00a0Sangyoon, Stephen L.\u00a0Scott, and Laura Ricci (Eds.). Springer International Publishing, Cham, 41\u201352."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid.2011.83"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607046"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2017.2703149"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358307"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356213"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00093"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507708"}],"event":{"name":"ICS '24: 2024 International Conference on Supercomputing","location":"Kyoto Japan","acronym":"ICS '24","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 38th ACM International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3650200.3656597","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3650200.3656597","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T15:25:36Z","timestamp":1755876336000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3650200.3656597"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,30]]},"references-count":70,"alternative-id":["10.1145\/3650200.3656597","10.1145\/3650200"],"URL":"https:\/\/doi.org\/10.1145\/3650200.3656597","relation":{},"subject":[],"published":{"date-parts":[[2024,5,30]]},"assertion":[{"value":"2024-06-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}