{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,27]],"date-time":"2026-02-27T15:55:45Z","timestamp":1772207745929,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,8,7]],"date-time":"2023-08-07T00:00:00Z","timestamp":1691366400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"National Key R&D Program of China","award":["2020YFB0204601"],"award-info":[{"award-number":["2020YFB0204601"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,8,7]]},"DOI":"10.1145\/3605573.3605587","type":"proceedings-article","created":{"date-parts":[[2023,9,13]],"date-time":"2023-09-13T16:21:16Z","timestamp":1694622076000},"page":"513-523","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["GFFT: a Task Graph Based Fast Fourier Transform Optimization Framework"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8105-147X","authenticated-orcid":false,"given":"Qinglin","family":"Lu","sequence":"first","affiliation":[{"name":"Institute of Software, Chinese Academy of Science, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7345-2003","authenticated-orcid":false,"given":"Xinyu","family":"Wang","sequence":"additional","affiliation":[{"name":"Institute of Software, Chinese Academy of Science, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1795-4498","authenticated-orcid":false,"given":"Wenjing","family":"Ma","sequence":"additional","affiliation":[{"name":"Institute of Software, Chinese Academy of Science, China and State Key Laboratory of Computer Science, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4719-1049","authenticated-orcid":false,"given":"Yuwen","family":"Zhao","sequence":"additional","affiliation":[{"name":"Institute of Software, Chinese Academy of Science, China and University of Chinese Academy of Sciences, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2823-7213","authenticated-orcid":false,"given":"Daokun","family":"Chen","sequence":"additional","affiliation":[{"name":"Institute of Software, Chinese Academy of Science, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7344-7493","authenticated-orcid":false,"given":"Fangfang","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute of Software, Chinese Academy of Science, China and State Key Laboratory of Computer Science, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,9,13]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"AOCL: AMD Optimizing CPU Libraries. https:\/\/developer.amd.com\/wp-content\/resources\/57404_User_Guide_AMD_AOCL_v3.2_GA.pdf","author":"AMD.","year":"2022","unstructured":"AMD. 2022. AOCL: AMD Optimizing CPU Libraries. https:\/\/developer.amd.com\/wp-content\/resources\/57404_User_Guide_AMD_AOCL_v3.2_GA.pdf"},{"key":"e_1_3_2_1_2_1","unstructured":"AMD. 2022. clFFT: a software library containing FFT functions written in OpenCL. https:\/\/github.com\/clMathLibraries\/clFFT"},{"key":"e_1_3_2_1_3_1","unstructured":"AMD. 2022. rocFFT: a software library for computing Fast Fourier Transforms (FFT) written in HIP. https:\/\/github.com\/ROCmSoftwarePlatform\/rocFFT"},{"key":"e_1_3_2_1_4_1","unstructured":"Apple. 2022. The Apple Accelerate libraries - vDSP. https:\/\/developer.apple.com\/documentation\/accelerate\/vdsp\/fast_fourier_transforms"},{"key":"e_1_3_2_1_5_1","unstructured":"ARM. 2022. Arm Performance Libraries. https:\/\/developer.arm.com\/documentation\/101004\/2202"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11265-014-0889-9"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2013.2273199"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAU.1970.1162132"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/237502.237574"},{"key":"e_1_3_2_1_10_1","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, 2018. { TVM} : An automated { End-to-End} optimizing compiler for deep learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). 578\u2013594."},{"key":"e_1_3_2_1_11_1","volume-title":"An algorithm for the machine calculation of complex Fourier series. Mathematics of computation 19, 90","author":"Cooley W","year":"1965","unstructured":"James\u00a0W Cooley and John\u00a0W Tukey. 1965. An algorithm for the machine calculation of complex Fourier series. Mathematics of computation 19, 90 (1965), 297\u2013301."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Leonardo Dagum and Ramesh Menon. 1998. OpenMP: an industry standard API for shared-memory programming. IEEE computational science and engineering 5 1 (1998) 46\u201355.","DOI":"10.1109\/99.660313"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2018.2873289"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1998.681704"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.840301"},{"key":"e_1_3_2_1_17_1","series-title":"Jan. 2017","volume-title":"fftw. org\/benchfft\/","author":"Frigo Matteo","year":"2019","unstructured":"Matteo Frigo and Steven\u00a0G Johnson. 2019. BenchFFT. Online] http:\/\/www. fftw. org\/benchfft\/(Jan. 2017) (2019)."},{"key":"e_1_3_2_1_18_1","unstructured":"Matteo Frigo and Stefan Kral. 2001. The advanced fft program generator genfft. (2001)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.2517-6161.1958.tb00300.x"},{"key":"e_1_3_2_1_20_1","first-page":"27","article-title":"Optimizing DNN computation with relaxed graph substitutions","volume":"1","author":"Jia Zhihao","year":"2019","unstructured":"Zhihao Jia, James Thomas, Todd Warszawski, Mingyu Gao, Matei Zaharia, and Alex Aiken. 2019. Optimizing DNN computation with relaxed graph substitutions. Proceedings of Machine Learning and Systems 1 (2019), 27\u201339.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_21_1","first-page":"1","article-title":"Mnn: A universal and efficient inference engine","volume":"2","author":"Jiang Xiaotang","year":"2020","unstructured":"Xiaotang Jiang, Huan Wang, Yiliu Chen, Ziqi Wu, Lichuan Wang, Bin Zou, Yafeng Yang, Zongyang Cui, Yu Cai, Tianhang Yu, 2020. Mnn: A universal and efficient inference engine. Proceedings of Machine Learning and Systems 2 (2020), 1\u201313.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-24650-3_11"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356138"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45545-0_17"},{"key":"e_1_3_2_1_25_1","unstructured":"Nvidia. 2022. CUDA Fast Fourier Transform library. https:\/\/docs.nvidia.com\/cuda\/cufft\/index.html"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.840306"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/PROC.1968.6477"},{"key":"e_1_3_2_1_28_1","volume-title":"Parallel computations","author":"Swarztrauber N","unstructured":"Paul\u00a0N Swarztrauber. 1982. Vectorizing the ffts. In Parallel computations. Elsevier, 51\u201383."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-8191(84)90413-7"},{"key":"e_1_3_2_1_30_1","volume-title":"Fast Fourier transform algorithms for parallel computers","author":"Takahashi Daisuke","unstructured":"Daisuke Takahashi. 2019. Fast Fourier transform algorithms for parallel computers. Springer."},{"key":"e_1_3_2_1_31_1","volume-title":"High-Performance Computing on the Intel\u00ae Xeon Phi\u2122","author":"Wang Endong","unstructured":"Endong Wang, Qing Zhang, Bo Shen, Guangyong Zhang, Xiaowei Lu, Qing Wu, and Yajuan Wang. 2014. Intel math kernel library. In High-Performance Computing on the Intel\u00ae Xeon Phi\u2122. Springer, 167\u2013188."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/MDAT.2020.2968258"}],"event":{"name":"ICPP 2023: 52nd International Conference on Parallel Processing","location":"Salt Lake City UT USA","acronym":"ICPP 2023"},"container-title":["Proceedings of the 52nd International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605573.3605587","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3605573.3605587","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:49:04Z","timestamp":1750182544000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3605573.3605587"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,7]]},"references-count":31,"alternative-id":["10.1145\/3605573.3605587","10.1145\/3605573"],"URL":"https:\/\/doi.org\/10.1145\/3605573.3605587","relation":{},"subject":[],"published":{"date-parts":[[2023,8,7]]},"assertion":[{"value":"2023-09-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}