{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T18:50:07Z","timestamp":1771699807615,"version":"3.50.1"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030856649","type":"print"},{"value":"9783030856656","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-85665-6_27","type":"book-chapter","created":{"date-parts":[[2021,8,28]],"date-time":"2021-08-28T03:06:52Z","timestamp":1630120012000},"page":"435-450","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":38,"title":["Efficient GPU Computation Using Task Graph Parallelism"],"prefix":"10.1007","author":[{"given":"Dian-Lun","family":"Lin","sequence":"first","affiliation":[]},{"given":"Tsung-Wei","family":"Huang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,8,25]]},"reference":[{"key":"27_CR1","unstructured":"NVIDIA CUDA graph example. https:\/\/github.com\/NVIDIA\/cuda-samples\/blob\/master\/Samples\/simpleCudaGraphs\/simpleCudaGraphs.cu"},{"key":"27_CR2","unstructured":"OpenMP. https:\/\/www.openmp.org"},{"key":"27_CR3","unstructured":"Taskflow. https:\/\/taskflow.github.io"},{"key":"27_CR4","unstructured":"Cuda graph in tensorflow. In: NVIDIA GPU Technology Conference (GTC) (2021). https:\/\/www.nvidia.com\/en-us\/on-demand\/session\/gtcspring21-s31312\/"},{"key":"27_CR5","unstructured":"Effortless CUDA graphs. In: NVIDIA GPU Technology Conference (GTC) (2021). https:\/\/www.nvidia.com\/en-us\/on-demand\/session\/gtcspring21-s32082\/"},{"issue":"2","key":"27_CR6","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1137\/0201008","volume":"1","author":"AV Aho","year":"1972","unstructured":"Aho, A.V., Garey, M.R., Ullman, J.D.: The transitive reduction of a directed graph. SIAM J. Comput. 1(2), 131\u2013137 (1972)","journal-title":"SIAM J. Comput."},{"issue":"2","key":"27_CR7","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1002\/cpe.1631","volume":"23","author":"C Augonnet","year":"2011","unstructured":"Augonnet, C., Thibault, S., Namyst, R., Wacrenier, P.A.: StarPU: a unified platform for task scheduling on heterogeneous multicore architectures. Concurr. Comput. Pract. Exp. 23(2), 187\u2013198 (2011)","journal-title":"Concurr. Comput. Pract. Exp."},{"key":"27_CR8","doi-asserted-by":"crossref","unstructured":"Bauer, M., Treichler, S., Slaughter, E., Aiken, A.: Legion: expressing locality and independence with logical regions. In: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, SC 2012, pp. 1\u201311. IEEE (2012)","DOI":"10.1109\/SC.2012.71"},{"issue":"3","key":"27_CR9","doi-asserted-by":"publisher","first-page":"457","DOI":"10.1007\/s11265-017-1262-6","volume":"89","author":"T Blattner","year":"2017","unstructured":"Blattner, T., Keyrouz, W., Bhattacharyya, S.S., Halem, M., Brady, M.: A hybrid task graph scheduler for high performance image processing workflows. J. Sig. Process. Syst. 89(3), 457\u2013467 (2017)","journal-title":"J. Sig. Process. Syst."},{"key":"27_CR10","doi-asserted-by":"crossref","unstructured":"Bosilca, G., Bouteiller, A., Danalis, A., Herault, T., Lemariner, P., Dongarra, J.: DAGuE: a generic distributed DAG engine for high performance computing. In: 2011 IEEE International Symposium on Parallel and Distributed Processing Workshops and Phd Forum, Anchorage, Alaska, USA, pp. 1151\u20131158. IEEE (2011)","DOI":"10.1109\/IPDPS.2011.281"},{"key":"27_CR11","doi-asserted-by":"publisher","unstructured":"Edwards, H.C., Trott, C.R., Sunderland, D.: Kokkos: enabling manycore performance portability through polymorphic memory access patterns. J. Parallel Distrib. Comput. 74(12), 3202\u20133216 (2014). Domain-Specific Languages and High-Level Frameworks for High-Performance Computing. https:\/\/doi.org\/10.1016\/j.jpdc.2014.07.003. http:\/\/www.sciencedirect.com\/science\/article\/pii\/S0743731514001257","DOI":"10.1016\/j.jpdc.2014.07.003"},{"key":"27_CR12","doi-asserted-by":"crossref","unstructured":"Guo, G., Huang, T.W., Lin, Y., Wong, M.: GPU-accelerated Pash-based timing analysis. In: ACM\/IEEE Design Automation Conference (DAC) (2021)","DOI":"10.1109\/DAC18074.2021.9586316"},{"key":"27_CR13","doi-asserted-by":"crossref","unstructured":"Guo, Z., Huang, T.W., Lin, Y.: GPU-accelerated static timing analysis. In: IEEE\/ACM International Conference on Computer-Aided Design (ICCAD), pp. 1\u20138 (2020)","DOI":"10.1145\/3400302.3415631"},{"issue":"1\u20133","key":"27_CR14","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1016\/0012-365X(93)90164-O","volume":"111","author":"M Habib","year":"1993","unstructured":"Habib, M., Morvan, M., Rampon, J.X.: On the calculation of transitive reduction-closure of orders. Discret. Math. 111(1\u20133), 289\u2013303 (1993)","journal-title":"Discret. Math."},{"key":"27_CR15","doi-asserted-by":"crossref","unstructured":"Huang, T.W.: A general-purpose parallel and heterogeneous task programming system for VLSI CAD. In: IEEE\/ACM International Conference on Computer-aided Design (ICCAD) (2020)","DOI":"10.1145\/3400302.3415750"},{"key":"27_CR16","doi-asserted-by":"crossref","unstructured":"Huang, T.W., Lin, C.X., Guo, G., Wong, M.: Cpp-taskflow: fast task-based parallel programming using modern c++. In: 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 974\u2013983. IEEE (2019)","DOI":"10.1109\/IPDPS.2019.00105"},{"issue":"4","key":"27_CR17","doi-asserted-by":"publisher","first-page":"776","DOI":"10.1109\/TCAD.2020.3007319","volume":"40","author":"TW Huang","year":"2021","unstructured":"Huang, T.W., Lin, C.X., Wong, M.D.F.: OpenTimer v2: a parallel incremental timing analysis engine. IEEE Trans. Comput. Aided Des. Integr. Circ. Syst. (TCAD) 40(4), 776\u2013789 (2021)","journal-title":"IEEE Trans. Comput. Aided Des. Integr. Circ. Syst. (TCAD)"},{"issue":"2","key":"27_CR18","doi-asserted-by":"publisher","first-page":"62","DOI":"10.1109\/MDAT.2021.3049177","volume":"38","author":"TW Huang","year":"2021","unstructured":"Huang, T.W., Lin, C.X., Wong, M.D.F.: OpenTimer v2: a parallel incremental timing analysis engine. IEEE Des. Test 38(2), 62\u201368 (2021)","journal-title":"IEEE Des. Test"},{"key":"27_CR19","doi-asserted-by":"crossref","unstructured":"Huang, T.W., Lin, D.L., Lin, Y., Lin, C.X.: Taskflow: a general-purpose parallel and heterogeneous task programming system. IEEE Trans. Comput. Aided Des. Integr. Circ. Syst. (TCAD) (2021)","DOI":"10.1109\/TCAD.2021.3082507"},{"key":"27_CR20","doi-asserted-by":"publisher","first-page":"1687","DOI":"10.1109\/TCAD.2020.3025075","volume":"40","author":"TW Huang","year":"2021","unstructured":"Huang, T.W., Lin, Y., Lin, C.X., Guo, G., Wong, M.D.F.: Cpp-Taskflow: a general-purpose parallel task programming system at scale. IEEE Trans. Comput. Aided Des. Integr. Circ. Syst. (TCAD) 40, 1687\u20131700 (2021)","journal-title":"IEEE Trans. Comput. Aided Des. Integr. Circ. Syst. (TCAD)"},{"key":"27_CR21","doi-asserted-by":"crossref","unstructured":"Huang, T.W., Wong, M.: OpenTimer: a high-performance timing analysis tool. In: IEEE\/ACM International Conference on Computer-Aided Design (ICCAD), pp. 895\u2013902 (2015)","DOI":"10.1109\/ICCAD.2015.7372666"},{"key":"27_CR22","doi-asserted-by":"crossref","unstructured":"Lin, C.X., Huang, T.W., Guo, G., Wong, M.: An efficient and composable parallel task programming library. In: IEEE High Performance Extreme Computing (HPEC), pp. 1\u20137 (2019)","DOI":"10.1109\/HPEC.2019.8916447"},{"key":"27_CR23","doi-asserted-by":"crossref","unstructured":"Lin, C.X., Huang, T.W., Guo, G., Wong, M.D.F.: A modern c++ parallel task programming library. In: ACM Multimedia Conference, pp. 2284\u20132287 (2019)","DOI":"10.1145\/3343031.3350537"},{"key":"27_CR24","doi-asserted-by":"crossref","unstructured":"Lin, D.L., Huang, T.W.: A novel inference algorithm for large sparse neural network using task graph parallelism. In: IEEE High Performance Extreme Computing Conference (HPEC), pp. 1\u20137. IEEE (2020)","DOI":"10.1109\/HPEC43674.2020.9286218"},{"key":"27_CR25","doi-asserted-by":"publisher","unstructured":"Qiao, B., Akif \u00d6zkan, M., Teich, J., Hannig, F.: The best of both worlds: combining CUDA graph with an image processing DSL. In: 2020 57th ACM\/IEEE Design Automation Conference (DAC), pp. 1\u20136 (2020). https:\/\/doi.org\/10.1109\/DAC18072.2020.9218531","DOI":"10.1109\/DAC18072.2020.9218531"},{"issue":"1\u20133","key":"27_CR26","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1016\/0304-3975(88)90032-1","volume":"58","author":"K Simon","year":"1988","unstructured":"Simon, K.: An improved algorithm for transitive closure on acyclic digraphs. Theoret. Comput. Sci. 58(1\u20133), 325\u2013346 (1988)","journal-title":"Theoret. Comput. Sci."},{"key":"27_CR27","doi-asserted-by":"crossref","unstructured":"Valdes, J., Tarjan, R.E., Lawler, E.L.: The recognition of series parallel digraphs. In: Proceedings of the 11th Annual ACM Symposium on Theory of Computing, pp. 1\u201312 (1979)","DOI":"10.1145\/800135.804393"},{"key":"27_CR28","doi-asserted-by":"publisher","unstructured":"Yu, C., Royuela, S., Qui\u00f1ones, E.: OpenMP to CUDA graphs: a compiler-based transformation to enhance the programmability of NVIDIA devices.. In: Proceedings of the 23th International Workshop on Software and Compilers for Embedded Systems, SCOPES 2020, New York, NY, USA, pp. 42\u201347. Association for Computing Machinery (2020). https:\/\/doi.org\/10.1145\/3378678.3391881","DOI":"10.1145\/3378678.3391881"},{"issue":"2","key":"27_CR29","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1007\/s00778-018-0495-8","volume":"27","author":"J Zhou","year":"2018","unstructured":"Zhou, J., Yu, J.X., Li, N., Wei, H., Chen, Z., Tang, X.: Accelerating reachability query processing based on DAG reduction. VLDB J. 27(2), 271\u2013296 (2018)","journal-title":"VLDB J."},{"key":"27_CR30","doi-asserted-by":"crossref","unstructured":"Zhou, J., Zhou, S., Yu, J.X., Wei, H., Chen, Z., Tang, X.: DAG reduction: fast answering reachability queries. In: Proceedings of the 2017 ACM International Conference on Management of Data, pp. 375\u2013390 (2017)","DOI":"10.1145\/3035918.3035927"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2021: Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-85665-6_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,27]],"date-time":"2025-08-27T22:03:04Z","timestamp":1756332184000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-85665-6_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030856649","9783030856656"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-85665-6_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"25 August 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Euro-Par","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lisbon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"europar2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2021.euro-par.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"136","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"38","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}