{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T23:27:53Z","timestamp":1777937273545,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100000781","name":"European Research Council","doi-asserted-by":"publisher","award":["949587"],"award-info":[{"award-number":["949587"]}],"id":[{"id":"10.13039\/501100000781","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,8]]},"DOI":"10.1145\/3721145.3730426","type":"proceedings-article","created":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:57:17Z","timestamp":1755867437000},"page":"384-396","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["A Device-Side Execution Model for Multi-GPU Task Graphs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0384-6330","authenticated-orcid":false,"given":"Ilyas","family":"Turimbetov","sequence":"first","affiliation":[{"name":"Ko\u00e7 University, Istanbul, Turkiye"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7165-2095","authenticated-orcid":false,"given":"Mohamed","family":"Wahib","sequence":"additional","affiliation":[{"name":"RIKEN Center for Computational Science, Tokyo, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2351-0770","authenticated-orcid":false,"given":"Didem","family":"Unat","sequence":"additional","affiliation":[{"name":"Ko\u00e7 University, Istanbul, Turkiye"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,8,22]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/CCGRID.2017.29"},{"key":"e_1_3_3_1_3_2","unstructured":"AMD. 2023. HIP Runtime API reference: Graph Management. https:\/\/docs.amd.com\/projects\/HIP\/en\/docs-5.0.0\/doxygen\/html\/group___graph.html."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00049"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-03869-3_80"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2012.71"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/SCW63240.2024.00155"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Mehmet\u00a0E. Belviranli Seyong Lee Jeffrey\u00a0S. Vetter and Laxmi\u00a0N. Bhuyan. 2018. Juggler: A Dependence-Aware Task-Based Execution Framework for GPUs. SIGPLAN Not. 53 1 (feb 2018) 54\u201367.","DOI":"10.1145\/3200691.3178492"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/3018743.3018756"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"George Bosilca Aurelien Bouteiller Anthony Danalis Mathieu Faverge Thomas H\u00e9rault and Jack\u00a0J Dongarra. 2013. Parsec: Exploiting heterogeneity to enhance scalability. Computing in Science & Engineering 15 6 (2013) 36\u201345.","DOI":"10.1109\/MCSE.2013.98"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Tiago Carneiro\u00a0Pessoa Jan Gmys Francisco\u00a0Heron de Carvalho\u00a0J\u00fanior Nouredine Melab and Daniel Tuyttens. 2018. GPU-accelerated backtracking using CUDA Dynamic Parallelism. Concurrency and Computation: Practice and Experience 30 9 (2018) e4374.","DOI":"10.1002\/cpe.4374"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2011.50"},{"key":"e_1_3_3_1_13_2","series-title":"(SC\u201922)","volume-title":"Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis","author":"Chen Yuxin","year":"2022","unstructured":"Yuxin Chen, Benjamin Brock, Serban Porumbescu, Ayd\u0131n Bulu\u00e7, Katherine Yelick, and John\u00a0D. Owens. 2022. Scalable Irregular Parallelism with GPUs: Getting CPUs out of the Way. In Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis (Dallas, Texas) (SC\u201922). IEEE, Article 50, 16\u00a0pages."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1145\/3431379.3464454"},{"key":"e_1_3_3_1_15_2","unstructured":"Francisco Corbera Andr\u00e9s Rodr\u00edguez Rafael Asenjo Angeles Navarro Antonio Vilches and Mar\u00eda\u00a0J Garzar\u00e1n. 2015. Reducing overheads of dynamic scheduling on heterogeneous chips. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1501.03336 (2015)."},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/2931088.2931091"},{"key":"e_1_3_3_1_17_2","unstructured":"James\u00a0W Demmely Nicholas\u00a0J Highamz and Robert\u00a0S Schreiberx. 1992. Block LU Factorization. (1992)."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356223"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2013.66"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/InPar.2012.6339596"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","unstructured":"Julien Herrmann M.\u00a0Yusuf \u00d6zkaya Bora U\u00e7ar Kamer Kaya and \u00dcmit\u00a0V. \u00c7ataly\u00fcrek. 2019. Multilevel Algorithms for Acyclic Partitioning of Directed Acyclic Graphs. SIAM Journal on Scientific Computing (SISC) 41 4 (2019) A2117\u2013A2145. 10.1137\/18M1176865","DOI":"10.1137\/18M1176865"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCC\/SmartCity\/DSS.2018.00155"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3577193.3593713"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"crossref","unstructured":"\u0141ukasz Jarz\u0105bek and Pawe\u0142 Czarnul. 2017. Performance evaluation of unified memory and dynamic parallelism for selected parallel CUDA applications. The Journal of Supercomputing 73 (2017) 5378\u20135401.","DOI":"10.1007\/s11227-017-2091-x"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"crossref","unstructured":"Zhihao Jia Yongkee Kwon Galen Shipman Pat McCormick Mattan Erez and Alex Aiken. 2017. A distributed multi-GPU system for fast graph processing. Proc. VLDB Endow. 11 3 (nov 2017) 297\u2013310.","DOI":"10.14778\/3157794.3157799"},{"key":"e_1_3_3_1_26_2","unstructured":"George Karypis and Vipin Kumar. 1997. METIS: A software package for partitioning unstructured graphs partitioning meshes and computing fill-reducing orderings of sparse matrices. (1997)."},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3205289.3205291"},{"key":"e_1_3_3_1_28_2","unstructured":"Woosuk Kwon Gyeong-In Yu Eunji Jeong and Byung-Gon Chun. 2020. Nimble: Lightweight and parallel gpu task scheduling for deep learning. Advances in Neural Information Processing Systems 33 (2020) 8343\u20138354."},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3126908.3126950"},{"key":"e_1_3_3_1_30_2","unstructured":"NVIDIA. 2023. CUDA Runtime API: Graph Management. https:\/\/docs.nvidia.com\/cuda\/cuda-runtime-api\/group__CUDART__GRAPH.html."},{"key":"e_1_3_3_1_31_2","unstructured":"NVIDIA. 2023. NVIDIA GPUDirect. https:\/\/developer.nvidia.com\/gpudirect."},{"key":"e_1_3_3_1_32_2","unstructured":"NVIDIA. 2023. NVIDIA GPUDirect Storage. https:\/\/developer.nvidia.com\/gpudirect-storage."},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575748"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"crossref","unstructured":"Markus Steinberger Bernhard Kainz Bernhard Kerbl Stefan Hauswiesner Michael Kenzel and Dieter Schmalstieg. 2012. Softshell: dynamic scheduling on GPUs. ACM Trans. on Graphics (TOG) 31 6 (2012) 1\u201311.","DOI":"10.1145\/2366145.2366180"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"crossref","unstructured":"Markus Steinberger Michael Kenzel Pedro Boechat Bernhard Kerbl Mark Dokter and Dieter Schmalstieg. 2014. Whippletree: Task-Based Scheduling of Dynamic Workloads on the GPU. ACM Trans. Graph. 33 6 Article 228 (nov 2014) 11\u00a0pages.","DOI":"10.1145\/2661229.2661250"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3337821.3337837"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3642961.3643799"},{"key":"e_1_3_3_1_38_2","unstructured":"Stanley Tzeng Anjul Patney and John\u00a0D Owens. 2010. Task management for irregular-parallel workloads on the GPU. (2010)."},{"key":"e_1_3_3_1_39_2","unstructured":"Didem Unat Ilyas Turimbetov Mohammed Kefah\u00a0Taha Issa Do\u011fan Sa\u011fbili Flavio Vella Daniele\u00a0De Sensi and Ismayil Ismayilov. 2024. The Landscape of GPU-Centric Communication. arxiv:https:\/\/arXiv.org\/abs\/2409.09874\u00a0[cs.DC] https:\/\/arxiv.org\/abs\/2409.09874"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00075"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/3392717.3392742"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3577193.3593705"}],"event":{"name":"ICS '25: 2025 International Conference on Supercomputing","location":"Salt Lake City USA","acronym":"ICS '25","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 39th ACM International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721145.3730426","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:58:17Z","timestamp":1755867497000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721145.3730426"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,8]]},"references-count":41,"alternative-id":["10.1145\/3721145.3730426","10.1145\/3721145"],"URL":"https:\/\/doi.org\/10.1145\/3721145.3730426","relation":{},"subject":[],"published":{"date-parts":[[2025,6,8]]},"assertion":[{"value":"2025-08-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}