{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T02:40:55Z","timestamp":1755830455460,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,30]],"date-time":"2025-03-30T00:00:00Z","timestamp":1743292800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["CCF-221696"],"award-info":[{"award-number":["CCF-221696"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"DOE U.S. Department of Energy","doi-asserted-by":"publisher","award":["DE-AC02- 05CH11231"],"award-info":[{"award-number":["DE-AC02- 05CH11231"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,30]]},"DOI":"10.1145\/3669940.3707237","type":"proceedings-article","created":{"date-parts":[[2025,2,6]],"date-time":"2025-02-06T12:28:01Z","timestamp":1738844881000},"page":"84-99","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Automatic Tracing in Task-Based Runtime Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0746-066X","authenticated-orcid":false,"given":"Rohan","family":"Yadav","sequence":"first","affiliation":[{"name":"Stanford University, Stanford, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8928-3032","authenticated-orcid":false,"given":"Michael","family":"Bauer","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8457-4105","authenticated-orcid":false,"given":"David","family":"Broman","sequence":"additional","affiliation":[{"name":"KTH Royal Institute of Technology, Stockholm, Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6093-7602","authenticated-orcid":false,"given":"Michael","family":"Garland","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3723-9555","authenticated-orcid":false,"given":"Alex","family":"Aiken","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2267-903X","authenticated-orcid":false,"given":"Fredrik","family":"Kjolstad","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,3,30]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. CANDLE | Exascale Deep Learning and Simulation Enabled Precision Medicine for Cancer - wordpress.cels.anl.gov. https:\/\/wordpress.cels.anl.gov\/candle\/. [Accessed 06-05--2024]."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2017.2766064"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.1631"},{"key":"e_1_3_2_1_4_1","unstructured":"Ray Authors. 2024. Ray Compiled Graph Documentation. Technical Report. AnyScale. https:\/\/docs.ray.io\/en\/latest\/ray-core\/ray-dag.html"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.21105\/jose.00021"},{"key":"e_1_3_2_1_6_1","volume-title":"Chandramohan A. Thekkath, and YonghuiWu.","author":"Barham Paul","year":"2022","unstructured":"Paul Barham, Aakanksha Chowdhery, Jeff Dean, Sanjay Ghemawat, Steven Hand, Dan Hurt, Michael Isard, Hyeontaek Lim, Ruoming Pang, Sudip Roy, Brennan Saeta, Parker Schuh, Ryan Sepassi, Laurent El Shafey, Chandramohan A. Thekkath, and YonghuiWu. 2022. Pathways: Asynchronous Distributed Dataflow for ML. arXiv:2203.12533 [cs.DC]"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356175"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437801.3441587"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3572848.3577515"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2012.71"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.25344\/S4QP4W"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","unstructured":"George Bosilca Aurelien Bouteiller Anthony Danalis Thomas Herault Pierre Lemarinier and Jack Dongarra. 2011. DAGuE: A Generic Distributed DAG Engine for High Performance Computing. In 2011 IEEE Int'l Symposium on Parallel and Distributed Processing Workshops and Phd Forum. 1151--1158. https:\/\/doi.org\/10.1109\/IPDPS.2011.281","DOI":"10.1109\/IPDPS.2011.281"},{"key":"e_1_3_2_1_13_1","unstructured":"Pi-Yueh Chuang. 2021. TorchSWE: GPU shallow-water equation solver."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1186\/1471--2105--8-S7-S21"},{"key":"e_1_3_2_1_15_1","unstructured":"Dask. 2024. Dask Computation Stages. Technical Report. Dask. https:\/\/docs.dask.org\/en\/stable\/phases-of-computation.html"},{"key":"e_1_3_2_1_16_1","volume-title":"Dask: Library for dynamic task scheduling","author":"Team Dask Development","year":"2016","unstructured":"Dask Development Team. 2016. Dask: Library for dynamic task scheduling. http:\/\/dask.pydata.org"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2020.107262"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/1542476.1542528"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/1543135.1542528"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/233561.233562"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3148226.3148233"},{"key":"e_1_3_2_1_22_1","volume-title":"Beyond Data and Model Parallelism for Deep Neural Networks. CoRR abs\/1807.05358","author":"Jia Zhihao","year":"2018","unstructured":"Zhihao Jia, Matei Zaharia, and Alex Aiken. 2018. Beyond Data and Model Parallelism for Deep Neural Networks. CoRR abs\/1807.05358 (2018). arXiv:1807.05358 http:\/\/arxiv.org\/abs\/1807.05358"},{"volume-title":"Linear-Time Longest-Common-Prefix Computation in Suffix Arrays and Its Applications","author":"Kasai Toru","key":"e_1_3_2_1_23_1","unstructured":"Toru Kasai, Gunho Lee, Hiroki Arimura, Setsuo Arikawa, and Kunsoo Park. 2001. Linear-Time Longest-Common-Prefix Computation in Suffix Arrays and Its Applications. In Combinatorial Pattern Matching, Amihood Amir (Ed.). Springer Berlin Heidelberg, Berlin, Heidelberg, 181--192."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00037"},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the 2017 USENIX Conference on Usenix Annual Technical Conference (Santa Clara, CA, USA) (USENIX ATC '17). USENIX Association, USA, 513--526","author":"Mashayekhi Omid","year":"2017","unstructured":"Omid Mashayekhi, Hang Qu, Chinmayee Shah, and Philip Levis. 2017. Execution templates: caching control plane decisions for strong scaling of data analytics. In Proceedings of the 2017 USENIX Conference on Usenix Annual Technical Conference (Santa Clara, CA, USA) (USENIX ATC '17). USENIX Association, USA, 513--526."},{"key":"e_1_3_2_1_26_1","volume-title":"Ray: A Distributed Framework for Emerging AI Applications. CoRR abs\/1712.05889","author":"Moritz Philipp","year":"2017","unstructured":"Philipp Moritz, Robert Nishihara, Stephanie Wang, Alexey Tumanov, Richard Liaw, Eric Liang, William Paul, Michael I. Jordan, and Ion Stoica. 2017. Ray: A Distributed Framework for Emerging AI Applications. CoRR abs\/1712.05889 (2017). arXiv:1712.05889 http:\/\/arxiv.org\/abs\/1712.05889"},{"key":"e_1_3_2_1_27_1","unstructured":"NVIDIA. 2024. CUDA Graph Documentation. Technical Report. NVIDIA. https:\/\/docs.nvidia.com\/cuda\/cuda-runtime-api\/group__CUDART__GRAPH.html"},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the 2001 Symposium on JavaTM Virtual Machine Research and Technology Symposium -","volume":"1","author":"Paleczny Michael","year":"2001","unstructured":"Michael Paleczny, Christopher Vick, and Cliff Click. 2001. The java hotspotTM server compiler. In Proceedings of the 2001 Symposium on JavaTM Virtual Machine Research and Technology Symposium - Volume 1 (Monterey, California) (JVM'01). USENIX Association, USA, 1."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2858788.2688515"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2012.30"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.1996.566447"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/872757.872770"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3591237"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","unstructured":"Elliott Slaughter Wei Wu Yuankun Fu Legend Brandenburg Nicolai Garcia Wilhem Kautz Emily Marx Kaleb S. Morris Qinglei Cao George Bosilca Seema Mirchandaney Wonchan Leek Sean Treichlerk Patrick McCormick and Alex Aiken. 2020. Task Bench: A Parameterized Benchmark for Evaluating Parallel Runtime Performance. In SC20: Int'l Conference for High Performance Computing Networking Storage and Analysis. 1--15. https:\/\/doi.org\/10.1109\/SC41405.2020.00066","DOI":"10.1109\/SC41405.2020.00066"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/322344.322346"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0304--3975(01)00121--9"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1201\/b21930--12"},{"key":"e_1_3_2_1_38_1","volume-title":"Unity: Accelerating DNN Training Through Joint Optimization of Algebraic Transformations and Parallelization. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Unger Colin","year":"2022","unstructured":"Colin Unger, Zhihao Jia, Wei Wu, Sina Lin, Mandeep Baines, Carlos Efrain Quintero Narvaez, Vinay Ramakrishnaiah, Nirmal Prajapati, Pat McCormick, Jamaludin Mohd-Yusof, Xi Luo, Dheevatsa Mudigere, Jongsoo Park, Misha Smelyanskiy, and Alex Aiken. 2022. Unity: Accelerating DNN Training Through Joint Optimization of Algebraic Transformations and Parallelization. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22). USENIX Association, Carlsbad, CA, 267--284. https:\/\/www.usenix.org\/conference\/osdi22\/presentation\/unger"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.1984.1659158"},{"volume-title":"The Free Encyclopedia","key":"e_1_3_2_1_40_1","unstructured":"Wikipedia. 2024. Ruler function - Wikipedia, The Free Encyclopedia. http:\/\/en.wikipedia.org\/w\/index.php?title=Ruler%20function&oldid=1193825609. [Online; accessed 02-May-2024]."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607033"},{"key":"e_1_3_2_1_42_1","volume-title":"Proceedings of the 9th USENIX Conference on Networked Systems Design and Implementation","author":"Zaharia Matei","year":"2012","unstructured":"Matei Zaharia, Mosharaf Chowdhury, Tathagata Das, Ankur Dave, Justin Ma, Murphy McCauley, Michael J. Franklin, Scott Shenker, and Ion Stoica. 2012. Resilient distributed datasets: a fault-tolerant abstraction for in-memory cluster computing. In Proceedings of the 9th USENIX Conference on Networked Systems Design and Implementation (San Jose, CA) (NSDI'12). USENIX Association, USA, 2."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/1133981.1134012"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1977.1055714"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1978.1055934"}],"event":{"name":"ASPLOS '25: 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture"],"location":"Rotterdam Netherlands","acronym":"ASPLOS '25"},"container-title":["Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3669940.3707237","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3669940.3707237","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T14:49:05Z","timestamp":1755787745000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3669940.3707237"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,30]]},"references-count":45,"alternative-id":["10.1145\/3669940.3707237","10.1145\/3669940"],"URL":"https:\/\/doi.org\/10.1145\/3669940.3707237","relation":{},"subject":[],"published":{"date-parts":[[2025,3,30]]},"assertion":[{"value":"2025-03-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}