{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T12:57:11Z","timestamp":1761397031883,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":92,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2235276","2349144","2349143","2349582","2349141"],"award-info":[{"award-number":["2235276","2349144","2349143","2349582","2349141"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1145\/3711708.3723443","type":"proceedings-article","created":{"date-parts":[[2025,5,2]],"date-time":"2025-05-02T12:33:27Z","timestamp":1746189207000},"page":"8-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["An Efficient Implementation of Parallel Breadth-first Search"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-7731-6803","authenticated-orcid":false,"given":"Pao-I","family":"Chen","sequence":"first","affiliation":[{"name":"University of Wisconsin at Madison, Madison, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9768-3378","authenticated-orcid":false,"given":"Tsung-Wei","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Wisconsin at Madison, Madison, WI, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,5,2]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2025. C++ Thread. https:\/\/en.cppreference.com\/w\/cpp\/thread\/thread"},{"key":"e_1_3_2_1_2_1","unstructured":"2025. GAP Benchmark Suite. https:\/\/github.com\/sbeamer\/gapbs"},{"key":"e_1_3_2_1_3_1","unstructured":"2025. Speedcode BFS Benchmark. https:\/\/speedcode.org\/ide\/contest.html?ppopp_test_bfs_v1"},{"key":"e_1_3_2_1_4_1","unstructured":"2025. Taskflow Github. https:\/\/taskflow.github.io\/"},{"key":"e_1_3_2_1_5_1","volume-title":"High Performance Computing and Simulation (HPCS), 2010 International Conference on","author":"Agarwal Anshul","year":"2010","unstructured":"Anshul Agarwal and David A Bader. 2010. A scalable hybrid parallel breadth-first search algorithm on multicore CPU and GPU architectures. High Performance Computing and Simulation (HPCS), 2010 International Conference on (2010), 1--7."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.5555\/2388996.2389013"},{"key":"e_1_3_2_1_7_1","volume-title":"SC'11: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis","author":"Buluc Aydin","year":"2011","unstructured":"Aydin Buluc and Kamesh Madduri. 2011. Parallel breadth-first search on distributed memory systems. SC'11: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis (2011), 1--12."},{"key":"e_1_3_2_1_8_1","volume-title":"Incremental Critical Path Generation for Dynamic Graphs. In IEEE Computer Society Annual Symposium on VLSI (ISVLSI).","author":"Chang Che","year":"2024","unstructured":"Che Chang, Cheng-Hsiang Chiu, Boyang Zhang, and Tsung-Wei Huang. 2024. Incremental Critical Path Generation for Dynamic Graphs. In IEEE Computer Society Annual Symposium on VLSI (ISVLSI)."},{"key":"e_1_3_2_1_9_1","volume-title":"Ink: Efficient Incremental k-Critical Path Generation","author":"Chang Che","year":"2024","unstructured":"Che Chang, Tsung-Wei Huang, Dian-Lun Lin, Guannan Guo, and Shiju Lin. 2024. Ink: Efficient Incremental k-Critical Path Generation. In ACM\/IEEE DAC."},{"key":"e_1_3_2_1_10_1","volume-title":"PathGen: An Efficient Parallel Critical Path Generation Algorithm. In IEEE\/ACM Asia and South Pacific Design Automation Conference (ASP-DAC).","author":"Chang Che","year":"2025","unstructured":"Che Chang, Boyang Zhang, Cheng-Hsiang Chiu, Dian-Lun Lin, Yi-Hua Chung, Wan-Luan Lee, Zizheng Guo, Yibo Lin, and Tsung-Wei Huang. 2025. PathGen: An Efficient Parallel Critical Path Generation Algorithm. In IEEE\/ACM Asia and South Pacific Design Automation Conference (ASP-DAC)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC58863.2023.10363426"},{"key":"e_1_3_2_1_12_1","volume-title":"GSAP: A GPU-Accelerated Stochastic Graph Partitioner. In ACM ICPP. 565--575.","author":"Chang Chih-Chun","year":"2024","unstructured":"Chih-Chun Chang, Boyang Zhang, and Tsung-Wei Huang. 2024. GSAP: A GPU-Accelerated Stochastic Graph Partitioner. In ACM ICPP. 565--575."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3502181.3533714"},{"key":"e_1_3_2_1_14_1","volume-title":"Efficient Timing Propagation with Simultaneous Structural and Pipeline Parallelisms. In ACM\/IEEE Design Automation Conference (DAC).","author":"Chiu Cheng-Hsiang","year":"2022","unstructured":"Cheng-Hsiang Chiu and Tsung-Wei Huang. 2022. Efficient Timing Propagation with Simultaneous Structural and Pipeline Parallelisms. In ACM\/IEEE Design Automation Conference (DAC)."},{"key":"e_1_3_2_1_15_1","volume-title":"An Experimental Study of Dynamic Task Graph Parallelism for Large-Scale Circuit Analysis Workloads. In IEEE Computer Society Annual Symposium on VLSI (ISVLSI).","author":"Chiu Cheng-Hsiang","year":"2024","unstructured":"Cheng-Hsiang Chiu and Tsung-Wei Huang. 2024. An Experimental Study of Dynamic Task Graph Parallelism for Large-Scale Circuit Analysis Workloads. In IEEE Computer Society Annual Symposium on VLSI (ISVLSI)."},{"key":"e_1_3_2_1_16_1","volume-title":"An Experimental Study of SYCL Task Graph Parallelism for Large-Scale Machine Learning Workloads. In International Workshop of Asynchronous Many-Task systems for Exascale (AMTE).","author":"Chiu Cheng-Hsiang","year":"2021","unstructured":"Cheng-Hsiang Chiu, Dian-Lun Lin, and Tsung-Wei Huang. 2021. An Experimental Study of SYCL Task Graph Parallelism for Large-Scale Machine Learning Workloads. In International Workshop of Asynchronous Many-Task systems for Exascale (AMTE)."},{"key":"e_1_3_2_1_17_1","volume-title":"Programming Dynamic Task Parallelism for Heterogeneous EDA Algorithms. In IEEE\/ACM International Conference on Computer-aided Design (ICCAD).","author":"Chiu Cheng-Hsiang","year":"2023","unstructured":"Cheng-Hsiang Chiu, Dian-Lun Lin, and Tsung-Wei Huang. 2023. Programming Dynamic Task Parallelism for Heterogeneous EDA Algorithms. In IEEE\/ACM International Conference on Computer-aided Design (ICCAD)."},{"key":"e_1_3_2_1_18_1","volume-title":"Reinforcement Learning-generated Topological Order for Dynamic Task Graph Scheduling. In IEEE High-performance and Extreme Computing Conference (HPEC).","author":"Chiu Cheng-Hsiang","year":"2024","unstructured":"Cheng-Hsiang Chiu, Chedi Morchdi, Yi Zhou, Boyang Zhang, Che Chang, and Tsung-Wei Huang. 2024. Reinforcement Learning-generated Topological Order for Dynamic Task Graph Scheduling. In IEEE High-performance and Extreme Computing Conference (HPEC)."},{"key":"e_1_3_2_1_19_1","unstructured":"T. H. Cormen C. E. Leiserson R. L. Rivest and C. Stein. 2009. Introduction to Algorithms (3rd ed.). MIT Press."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01386390"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW59300.2023.00150"},{"key":"e_1_3_2_1_22_1","volume-title":"ACM International Workshop on Extreme Heterogeneity Solutions (ExHET).","author":"Gener Serhan","year":"2025","unstructured":"Serhan Gener, Sahil Hassan, Liangliang Chang, Chaitali Chakrabarti, Tsung-Wei Huang, Umit Ograss,, and Ali Akoglu. 2025. A Unified Portable and Programmable Framework for Task-Based Execution and Dynamic Resource Management on Heterogeneous Systems. In ACM International Workshop on Extreme Heterogeneity Solutions (ExHET)."},{"key":"e_1_3_2_1_23_1","volume-title":"An Efficient Critical Path Generation Algorithm Considering Extensive Path Constraints. In ACM\/IEEE Design Automation Conference (DAC).","author":"Guo Guannan","year":"2020","unstructured":"Guannan Guo, Tsung-Wei Huang, Chun-Xun Lin, and Martin Wong. 2020. An Efficient Critical Path Generation Algorithm Considering Extensive Path Constraints. In ACM\/IEEE Design Automation Conference (DAC)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2023.3272274"},{"key":"e_1_3_2_1_25_1","volume-title":"GPU-accelerated Critical Path Generation with Path Constraints. In IEEE\/ACM International Conference on Computer-Aided Design (IC-CAD).","author":"Guo Guannan","year":"2021","unstructured":"Guannan Guo, Tsung-Wei Huang, Yibo Lin, and Martin Wong. 2021. GPU-accelerated Critical Path Generation with Path Constraints. In IEEE\/ACM International Conference on Computer-Aided Design (IC-CAD)."},{"key":"e_1_3_2_1_26_1","volume-title":"GPU-accelerated Path-based Timing Analysis. In IEEE\/ACM Design Automation Conference (DAC).","author":"Guo Guannan","year":"2021","unstructured":"Guannan Guo, Tsung-Wei Huang, Yibo Lin, and Martin Wong. 2021. GPU-accelerated Path-based Timing Analysis. In IEEE\/ACM Design Automation Conference (DAC)."},{"volume-title":"Automation and Test in Europe Conference (DATE).","author":"Guo Guannan","key":"e_1_3_2_1_27_1","unstructured":"Guannan Guo, Tsung-Wei Huang, and Martin D. F. Wong. 2023. Fast STA Graph Partitioning Framework for Multi-GPU Acceleration. In IEEE\/ACM Design, Automation and Test in Europe Conference (DATE)."},{"key":"e_1_3_2_1_28_1","volume-title":"IEEE\/ACM International Conference on Computer-aided Design (ICCAD).","author":"Guo Zizheng","year":"2020","unstructured":"Zizheng Guo, Tsung-Wei Huang, and Yibo Lin. 2020. A Provably Good and Practically Efficient Algorithm for Common Path Pessimism Removal in Large Designs. In IEEE\/ACM International Conference on Computer-aided Design (ICCAD)."},{"key":"e_1_3_2_1_29_1","volume-title":"GPU-accelerated Static Timing Analysis. In IEEE\/ACM International Conference on Computer-Aided Design (ICCAD).","author":"Guo Zizheng","year":"2020","unstructured":"Zizheng Guo, Tsung-Wei Huang, and Yibo Lin. 2020. GPU-accelerated Static Timing Analysis. In IEEE\/ACM International Conference on Computer-Aided Design (ICCAD)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586085"},{"key":"e_1_3_2_1_31_1","volume-title":"HeteroCPPR: Accelerating Common Path Pessimism Removal with Heterogeneous CPU-GPU Parallelism. In IEEE\/ACM International Conference on Computer-Aided Design (ICCAD).","author":"Guo Zizheng","year":"2021","unstructured":"Zizheng Guo, Tsung-Wei Huang, and Yibo Lin. 2021. HeteroCPPR: Accelerating Common Path Pessimism Removal with Heterogeneous CPU-GPU Parallelism. In IEEE\/ACM International Conference on Computer-Aided Design (ICCAD)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2023.3286261"},{"key":"e_1_3_2_1_33_1","volume-title":"Automation and Test in Europe Conference (DATE).","author":"Guo Zizheng","year":"2024","unstructured":"Zizheng Guo, Tsung-Wei Huang, Jin Zhou, Cheng Zhuo, Yibo Lin, Runsheng Wang, and Ru Huang. 2024. Heterogeneous Static Timing Analysis with Advanced Delay Calculator. In IEEE\/ACM Design, Automation and Test in Europe Conference (DATE)."},{"key":"e_1_3_2_1_34_1","volume-title":"HeteroExcept: Heterogeneous Engine for General Timing Path Exception Analysis. In IEEE\/ACM International Conference on Computer-aided Design (ICCAD).","author":"Guo Zizheng","year":"2024","unstructured":"Zizheng Guo, Zuodong Zhang, Wuxi Li, Tsung-Wei Huang, Xizhe Shi, Yufan Du, Yibo Lin, Runsheng Wang, and Ru Huang. 2024. HeteroExcept: Heterogeneous Engine for General Timing Path Exception Analysis. In IEEE\/ACM International Conference on Computer-aided Design (ICCAD)."},{"key":"e_1_3_2_1_35_1","volume-title":"International Conference on High Performance Computing","author":"Harish Pawan","year":"2007","unstructured":"Pawan Harish and P J Narayanan. 2007. Accelerating large graph algorithms on the GPU using CUDA. International Conference on High Performance Computing (2007), 197--208."},{"key":"e_1_3_2_1_36_1","volume-title":"Parallel Architectures and Compilation Techniques (PACT), 2011 International Conference on","author":"Hong Sungpack","year":"2011","unstructured":"Sungpack Hong, Teddy Oguntebi, and Kunle Olukotun. 2011. An efficient parallel graph coloring algorithm for multi-core architectures. Parallel Architectures and Compilation Techniques (PACT), 2011 International Conference on (2011), 320--330."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3400302.3415750"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ProTools54808.2021.00006"},{"key":"e_1_3_2_1_39_1","volume-title":"Enhancing the Performance Portability of Heterogeneous Circuit Analysis Programs. In IEEE High-Performance Extreme Computing Conference (HPEC).","author":"Huang Tsung-Wei","year":"2022","unstructured":"Tsung-Wei Huang. 2022. Enhancing the Performance Portability of Heterogeneous Circuit Analysis Programs. In IEEE High-Performance Extreme Computing Conference (HPEC)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS54959.2023.00080"},{"key":"e_1_3_2_1_41_1","volume-title":"Wong","author":"Huang Tsung-Wei","year":"2021","unstructured":"Tsung-Wei Huang, Guannan Guo, Chun-Xun Lin, and Martin D. F. Wong. 2021. OpenTimer v2: A New Parallel Incremental Timing Analysis Engine. IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems (TCAD) (2021)."},{"key":"e_1_3_2_1_42_1","volume-title":"Task-parallel Programming with Constrained Parallelism. In IEEE High-Performance Extreme Computing Conference (HPEC).","author":"Huang Tsung-Wei","year":"2022","unstructured":"Tsung-Wei Huang and Leslie Hwang. 2022. Task-parallel Programming with Constrained Parallelism. In IEEE High-Performance Extreme Computing Conference (HPEC)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3322470"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3243654"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2019.00105"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3323477"},{"key":"e_1_3_2_1_47_1","volume-title":"DtCraft: A Distributed Execution Engine for Compute-intensive Applications. In IEEE\/ACM International Conference on Computer-aided Design (IC-CAD).","author":"Huang Tsung-Wei","year":"2017","unstructured":"Tsung-Wei Huang, Chun-Xun Lin, and Martin Wong. 2017. DtCraft: A Distributed Execution Engine for Compute-intensive Applications. In IEEE\/ACM International Conference on Computer-aided Design (IC-CAD)."},{"key":"e_1_3_2_1_48_1","volume-title":"DtCraft: A High-performance Distributed Execution Engine at Scale","author":"Huang Tsung-Wei","year":"2019","unstructured":"Tsung-Wei Huang, Chun-Xun Lin, and Martin Wong. 2019. DtCraft: A High-performance Distributed Execution Engine at Scale. IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems (TCAD) (2019)."},{"key":"e_1_3_2_1_49_1","volume-title":"OpenTimer v2: A Parallel Incremental Timing Analysis Engine","author":"Huang Tsung-Wei","year":"2021","unstructured":"Tsung-Wei Huang, Chun-Xun Lin, and Martin Wong. 2021. OpenTimer v2: A Parallel Incremental Timing Analysis Engine. IEEE Design and Test (DAT) (2021)."},{"key":"e_1_3_2_1_50_1","volume-title":"Taskflow: A Lightweight Parallel and Heterogeneous Task Graph Computing System","author":"Huang Tsung-Wei","year":"2022","unstructured":"Tsung-Wei Huang, Dian-Lun Lin, Chun-Xun Lin, and Yibo Lin. 2022. Taskflow: A Lightweight Parallel and Heterogeneous Task Graph Computing System. IEEE Transactions on Parallel and Distributed Systems (TPDS) (2022)."},{"key":"e_1_3_2_1_51_1","volume-title":"Taskflow: A General-purpose Parallel and Heterogeneous Task Programming System","author":"Huang Tsung-Wei","year":"2022","unstructured":"Tsung-Wei Huang, Dian-Lun Lin, Yibo Lin, and Chun-Xun Lin. 2022. Taskflow: A General-purpose Parallel and Heterogeneous Task Programming System. IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems (TCAD) (2022)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW55747.2022.00099"},{"key":"e_1_3_2_1_53_1","volume-title":"ACM\/IEEE Design Automation Conference (DAC). 741--746","author":"Huang Tsung-Wei","year":"2011","unstructured":"Tsung-Wei Huang, Hong-Yan Su, and Tsung-Yi Ho. 2011. Progressive network-flow based power-aware broadcast addressing for pin-constrained digital microfluidic biochips. In ACM\/IEEE Design Automation Conference (DAC). 741--746."},{"key":"e_1_3_2_1_54_1","volume-title":"OpenTimer: A High-Performance Timing Analysis Tool. In IEEE\/ACM International Conference on Computer-Aided Design (ICCAD).","author":"Huang Tsung-Wei","year":"2015","unstructured":"Tsung-Wei Huang and Martin Wong. 2015. OpenTimer: A High-Performance Timing Analysis Tool. In IEEE\/ACM International Conference on Computer-Aided Design (ICCAD)."},{"key":"e_1_3_2_1_55_1","volume-title":"An Ultra-Fast Path-Based Timing Analysis Algorithm for CPPR","author":"Huang Tsung-Wei","year":"2016","unstructured":"Tsung-Wei Huang and Martin Wong. 2016. UI-Timer 1.0: An Ultra-Fast Path-Based Timing Analysis Algorithm for CPPR. IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems (TCAD) (2016)."},{"volume-title":"IEEE\/ACM Design Automation Conference (DAC).","author":"Huang Tsung-Wei","key":"e_1_3_2_1_56_1","unstructured":"Tsung-Wei Huang, Martin Wong, D. Sinha, K. Kalafala, and N. Venkateswaran. 2016. A Distributed Timing Analysis Framework for Large Designs. In IEEE\/ACM Design Automation Conference (DAC)."},{"volume-title":"Fast Path-Based Timing Analysis for CPPR","author":"Huang Tsung-Wei","key":"e_1_3_2_1_57_1","unstructured":"Tsung-Wei Huang, P.-C. Wu, and Martin Wong. 2014. Fast Path-Based Timing Analysis for CPPR. In IEEE\/ACM ICCAD."},{"volume-title":"UI-Route: An Ultra-Fast Incremental Maze Routing Algorithm. In ACM System Level Interconnect Prediction Workshop (SLIP). 1--8.","author":"Huang Tsung-Wei","key":"e_1_3_2_1_58_1","unstructured":"Tsung-Wei Huang, Pei-Ci Wu, and Martin D. F. Wong. 2014. UI-Route: An Ultra-Fast Incremental Maze Routing Algorithm. In ACM System Level Interconnect Prediction Workshop (SLIP). 1--8."},{"key":"e_1_3_2_1_59_1","volume-title":"Wong","author":"Huang Tsung-Wei","year":"2014","unstructured":"Tsung-Wei Huang, Pei-Ci Wu, and Martin D. F. Wong. 2014. UI-Timer: An ultra-fast clock network pessimism removal algorithm. In IEEE\/ACM ICCAD."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626184.3635278"},{"key":"e_1_3_2_1_61_1","volume-title":"ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS).","author":"Jiang Shui","year":"2025","unstructured":"Shui Jiang, Yi-Hua Chung, Chih-Chun Chang, Tsung-Yi Ho, and Tsung-Wei Huang. 2025. BQSim: GPU-accelerated Batch Quantum Circuit Simulation using Decision Diagram. In ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)."},{"key":"e_1_3_2_1_62_1","volume-title":"GLARE: Accelerating Sparse DNN Inference Kernels with Global Memory Access Reduction. In IEEE High-performance and Extreme Computing Conference (HPEC).","author":"Jiang Shiu","year":"2023","unstructured":"Shiu Jiang, Tsung-Wei Huang, and Tsung-Yi Ho. 2023. GLARE: Accelerating Sparse DNN Inference Kernels with Global Memory Access Reduction. In IEEE High-performance and Extreme Computing Conference (HPEC)."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3605573.3605625"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"crossref","unstructured":"Jiang Shui and Fu Rongliang and Burgholzer Lukas and Wille Robert and Ho Tsung-Yi and Huang Tsung-Wei. 2024. FlatDD: A High-Performance Quantum Circuit Simulator using Decision Diagram and Flat Array. In ACM ICPP. 388--399.","DOI":"10.1145\/3673038.3673073"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3317744"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394885.3431578"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3061639.3062274"},{"key":"e_1_3_2_1_68_1","volume-title":"ACM\/IEEE Design Automation Conference (DAC).","author":"Lee Wan-Luan","year":"2025","unstructured":"Wan-Luan Lee, Shui Jiang, Dian-Lun Lin, Che Chang, Boyang Zhang, Yi-Hua Chung, Ulf Schlichtmann, Tsung-Yi Ho,, and Tsung-Wei Huang. 2025. iG-kway: Incremental k-way Graph Partitioning on GPU. In ACM\/IEEE Design Automation Conference (DAC)."},{"key":"e_1_3_2_1_69_1","volume-title":"IEEE\/ACM Asia and South Pacific Design Automation Conference (ASP-DAC).","author":"Lee Wan-Luan","year":"2025","unstructured":"Wan-Luan Lee, Dian-Lun Lin, Cheng-Hsiang Chiu, Ulf Schlichtmann, and Tsung-Wei Huang. 2025. HyperG: Multilevel GPU-Accelerated k-way Hypergraph Partitioner. In IEEE\/ACM Asia and South Pacific Design Automation Conference (ASP-DAC)."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3656238"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350537"},{"key":"e_1_3_2_1_72_1","volume-title":"An Efficient and Composable Parallel Task Programming Library. In IEEE High-performance and Extreme Computing Conference (HPEC).","author":"Lin Chun-Xun","year":"2019","unstructured":"Chun-Xun Lin, Tsung-Wei Huang, Guannan Guo, and Martin Wong. 2019. An Efficient and Composable Parallel Task Programming Library. In IEEE High-performance and Extreme Computing Conference (HPEC)."},{"key":"e_1_3_2_1_73_1","volume-title":"An Efficient Work-Stealing Scheduler for Task Dependency Graph. In IEEE International Conference on Parallel and Distributed Systems (ICPADS).","author":"Lin Chun-Xun","year":"2020","unstructured":"Chun-Xun Lin, Tsung-Wei Huang, and Martin Wong. 2020. An Efficient Work-Stealing Scheduler for Task Dependency Graph. In IEEE International Conference on Parallel and Distributed Systems (ICPADS)."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1145\/3194554.3194560"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC43674.2020.9286218"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-85665-6_27"},{"key":"e_1_3_2_1_77_1","volume-title":"Accelerating Large Sparse Neural Network Inference using GPU Task Graph Parallelism","author":"Lin Dian-Lun","year":"2022","unstructured":"Dian-Lun Lin and Tsung-Wei Huang. 2022. Accelerating Large Sparse Neural Network Inference using GPU Task Graph Parallelism. IEEE Transactions on Parallel and Distributed Systems (TPDS) (2022)."},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-69583-4_11"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1145\/3545008.3545091"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247942"},{"volume-title":"G-PASTA: GPU Accelerated Partitioning Algorithm for Static Timing Analysis","author":"Lin Shiju","key":"e_1_3_2_1_81_1","unstructured":"Shiju Lin, Guannan Guo, Tsung-Wei Huang, Weihua Sheng, Evangeline Young, and Martin Wong. 2024. G-PASTA: GPU Accelerated Partitioning Algorithm for Static Timing Analysis. In ACM\/IEEE DAC."},{"key":"e_1_3_2_1_82_1","volume-title":"IEEE\/ACM Asia and South Pacific Design Automation Conference (ASP-DAC).","author":"Morchdi Chedi","year":"2024","unstructured":"Chedi Morchdi, Cheng-Hsiang Chiu, Yi Zhou, and Tsung-Wei Huang. 2024. A Resource-efficient Task Scheduling System using Reinforcement Learning. In IEEE\/ACM Asia and South Pacific Design Automation Conference (ASP-DAC)."},{"key":"e_1_3_2_1_83_1","volume-title":"Taskflow-San: Sanitizing Erroneous Control Flow in Taskflow Programs. In IEEE Workshop on Extreme Scale Programming Models and Middleware (ESPM2).","author":"Mower McKay","year":"2021","unstructured":"McKay Mower, Luke Majors, and Tsung-Wei Huang. 2021. Taskflow-San: Sanitizing Erroneous Control Flow in Taskflow Programs. In IEEE Workshop on Extreme Scale Programming Models and Middleware (ESPM2)."},{"key":"e_1_3_2_1_84_1","volume-title":"OpenMP Application Programming Interface Version 5.2. https:\/\/www.openmp.org\/specifications\/ Accessed","author":"Architecture Review Board MP","year":"2025","unstructured":"OpenMP Architecture Review Board. 2021. OpenMP Application Programming Interface Version 5.2. https:\/\/www.openmp.org\/specifications\/ Accessed: January 27, 2025."},{"key":"e_1_3_2_1_85_1","volume-title":"Proceedings of the 9th International Symposium on Experimental Algorithms (SEA)","author":"Pearce R","year":"2010","unstructured":"R Pearce, M Gokhale, and N Amato. 2010. Multithreaded graph traversal, partitioning, and layout. Proceedings of the 9th International Symposium on Experimental Algorithms (SEA) (2010), 103--114."},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISVLSI61997.2024.00155"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2014.2331340"},{"key":"e_1_3_2_1_88_1","volume-title":"Proceedings of the ACM\/IEEE SC 2005 Conference (SC'05)","author":"Yoo Andy","year":"2005","unstructured":"Andy Yoo, Edmond Chow, Keith Henderson, Mahidhar T Rajan, and William C McLendon. 2005. A scalable distributed parallel breadth-first search algorithm on BlueGene\/L. Proceedings of the ACM\/IEEE SC 2005 Conference (SC'05) (2005), 25."},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC49654.2021.9622872"},{"key":"e_1_3_2_1_90_1","volume-title":"IEEE\/ACM Asia and South Pacific Design Automation Conference (ASP-DAC).","author":"Zhang Boyang","year":"2025","unstructured":"Boyang Zhang, Che Chang, Cheng-Hsiang Chiu, Dian-Lun Lin, Yang Sui, Chih-Chun Chang, Yi-Hua Chung, Wan-Luan Lee, Zizheng Guo, Yibo Lin, and Tsung-Wei Huang. 2025. iTAP: An Incremental Task Graph Partitioner for Task-parallel Static Timing Analysis. In IEEE\/ACM Asia and South Pacific Design Automation Conference (ASP-DAC)."},{"key":"e_1_3_2_1_91_1","volume-title":"Chih-Chun Chang, Donghao Fang, and Tsung-Wei Huang.","author":"Zhang Boyang","year":"2024","unstructured":"Boyang Zhang, Dian-Lun Lin, Che Chang, Cheng-Hsiang Chiu, Bojue Wang, Wan Luan Lee, Chih-Chun Chang, Donghao Fang, and Tsung-Wei Huang. 2024. G-PASTA: GPU Accelerated Partitioning Algorithm for Static Timing Analysis. In ACM\/IEEE DAC."},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASP-DAC52403.2022.9712566"}],"event":{"name":"FCPC '25: 1st FastCode Programming Challenge","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","SIGPLAN ACM Special Interest Group on Programming Languages"],"location":"The Westin Las Vegas Hotel & Spa Las Vegas NV USA","acronym":"FCPC '25"},"container-title":["Proceedings of the 1st FastCode Programming Challenge"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711708.3723443","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711708.3723443","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711708.3723443","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:19:15Z","timestamp":1750295955000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711708.3723443"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3]]},"references-count":92,"alternative-id":["10.1145\/3711708.3723443","10.1145\/3711708"],"URL":"https:\/\/doi.org\/10.1145\/3711708.3723443","relation":{},"subject":[],"published":{"date-parts":[[2025,3]]},"assertion":[{"value":"2025-05-02","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}