{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,14]],"date-time":"2025-05-14T04:23:34Z","timestamp":1747196614357,"version":"3.40.5"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2014,11,1]],"date-time":"2014-11-01T00:00:00Z","timestamp":1414800000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J. Comput. Sci. Technol."],"published-print":{"date-parts":[[2014,11]]},"DOI":"10.1007\/s11390-014-1484-z","type":"journal-article","created":{"date-parts":[[2014,11,21]],"date-time":"2014-11-21T16:19:40Z","timestamp":1416586780000},"page":"989-1002","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Memory Efficient Two-Pass 3D FFT Algorithm for Intel\u00ae Xeon PhiTM Coprocessor"],"prefix":"10.1007","volume":"29","author":[{"given":"Yi-Qun","family":"Liu","sequence":"first","affiliation":[]},{"given":"Yan","family":"Li","sequence":"additional","affiliation":[]},{"given":"Yun-Quan","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Xian-Yi","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,11,17]]},"reference":[{"key":"1484_CR1","unstructured":"Tessendorf J. Simulating ocean water. In SIGGRAPH 2001 Course Notes, http:\/\/people.clemson.edu\/~jtessen\/reports.html , Oct. 2014."},{"key":"1484_CR2","doi-asserted-by":"crossref","unstructured":"Ohno Y, Nishibori E, Narumi T, Koishi T, Tahirov T H, Ago H, Miyano M, Himeno R, Ebisuzaki T, Sakata M, Taiji M. A 281 Tflops calculation for X-ray protein structure analysis with special-purpose computers MDGRAPE-3. In Proc. SC, Nov. 2007, Article No. 56","DOI":"10.1145\/1362622.1362698"},{"key":"1484_CR3","first-page":"1111","volume":"12","author":"L Omlor","year":"2011","unstructured":"Omlor L, Giese M A. Anechoic blind source separation using wigner marginals. The Journal of Machine Learning Research, 2011, 12: 1111\u20131148.","journal-title":"The Journal of Machine Learning Research"},{"key":"1484_CR4","doi-asserted-by":"crossref","first-page":"297","DOI":"10.1090\/S0025-5718-1965-0178586-1","volume":"19","author":"JW Cooley","year":"1965","unstructured":"Cooley J W, Tukey J W. An algorithm for the machine calculation of complex Fourier series. Mathematics of Computation, 1965, 19: 297\u2013301.","journal-title":"Mathematics of Computation"},{"key":"1484_CR5","unstructured":"Good I J. The interaction algorithm and practical Fourier analysis. Journal of the Royal Statistical Society. Series B (Methodological), 1958, 20(2): 361\u2013372."},{"key":"1484_CR6","unstructured":"Thomas L H. Using a computer to solve problems in physics. Applications of Digital Computers, 1963: 44\u201345."},{"key":"1484_CR7","doi-asserted-by":"crossref","unstructured":"Yavne R. An economical method for calculating the discrete Fourier transform. In Proc. AFIPS Fall Joint Comput. Conf., Dec. 1968, pp.115\u2013125.","DOI":"10.1145\/1476589.1476610"},{"issue":"6","key":"1484_CR8","doi-asserted-by":"crossref","first-page":"1107","DOI":"10.1109\/PROC.1968.6477","volume":"56","author":"CM Rader","year":"1968","unstructured":"Rader C M. Discrete Fourier transforms when the number of data samples is prime. Proceedings of the IEEE, 1968, 56(6): 1107\u20131108.","journal-title":"Proceedings of the IEEE"},{"issue":"2","key":"1484_CR9","doi-asserted-by":"crossref","first-page":"216","DOI":"10.1109\/JPROC.2004.840301","volume":"93","author":"M Frigo","year":"2005","unstructured":"Frigo M, Johnson S G. The design and implementation of FFTW3. Proceedings of the IEEE, 2005, 93(2): 216\u2013231.","journal-title":"Proceedings of the IEEE"},{"key":"1484_CR10","doi-asserted-by":"crossref","unstructured":"Ali A, Johnsson L, Subhlok J. Scheduling FFT computation on SMP and multicore systems. In Proc. the 21st ICS, Jun. 2007, pp.293\u2013301.","DOI":"10.1145\/1274971.1275011"},{"issue":"1","key":"1484_CR11","doi-asserted-by":"crossref","first-page":"90","DOI":"10.1007\/s11390-013-1314-8","volume":"28","author":"Y Li","year":"2013","unstructured":"Li Y, Zhang Y Q, Liu Y Q, Long G P, Jia H P. MPFFT: An auto-tuning FFT library for OpenCL GPUs. Journal of Computer Science and Technology, 2013, 28(1): 90\u2013105.","journal-title":"Journal of Computer Science and Technology"},{"key":"1484_CR12","doi-asserted-by":"crossref","unstructured":"Nukada A, Matsuoka S. Auto-tuning 3-D FFT library for CUDA GPUs. In Proc. SC, Nov. 2009, Article No. 30.","DOI":"10.1145\/1654059.1654090"},{"key":"1484_CR13","doi-asserted-by":"crossref","unstructured":"Ramos S, Hoefler T. Modeling communication in cache-coherent SMP systems | A case-study with Xeon Phi. In Proc. the 22nd HPDC, Jun. 2013, pp. 97\u2013108.","DOI":"10.1145\/2493123.2462916"},{"key":"1484_CR14","doi-asserted-by":"crossref","DOI":"10.1137\/1.9781611970999","volume-title":"Computational Frameworks for the Fast Fourier Transform","author":"C Loan Van","year":"1992","unstructured":"Van Loan C. Computational Frameworks for the Fast Fourier Transform. Philadelphia USA: SIAM, 1992."},{"key":"1484_CR15","doi-asserted-by":"crossref","unstructured":"Takahashi D. A blocking algorithm for FFT on cache-based processors. In Proc. the 9th HPCN, Jun. 2001, pp.551\u2013554.","DOI":"10.1007\/3-540-48228-8_58"},{"key":"1484_CR16","doi-asserted-by":"crossref","unstructured":"Takahashi D. Implementation and evaluation of parallel FFT using SIMD instructions on multi-core processors. In Proc. IWIA, Jan. 2007, pp.53\u201359.","DOI":"10.1109\/IWIA.2007.16"},{"key":"1484_CR17","doi-asserted-by":"crossref","unstructured":"Frigo M, Leiserson C E, Prokop H, Ramachandran S. Cache-oblivious algorithms. In Proc. the 40th FOCS, Oct. 1999, pp.285\u2013297.","DOI":"10.1109\/SFFCS.1999.814600"},{"key":"1484_CR18","doi-asserted-by":"crossref","unstructured":"Gu L, Li X, Siegel J. An empirically tuned 2D and 3D FFT library on CUDA GPU. In Proc. the 24th ICS, Jun. 2010, pp.305\u2013314.","DOI":"10.1145\/1810085.1810127"},{"key":"1484_CR19","doi-asserted-by":"crossref","unstructured":"Nukada A, Ogata Y, Endo T, Matsuoka S. Bandwidth intensive 3-D FFT kernel for GPUs using CUDA. In Proc. SC, Nov. 2008, Article No. 5.","DOI":"10.1109\/SC.2008.5213210"},{"key":"1484_CR20","doi-asserted-by":"crossref","unstructured":"Dotsenko Y, Baghsorkhi S S, Lloyd B, Govindaraju N K. Auto-tuning of fast Fourier transform on graphics processors. In Proc. the 16th PPoPP, Feb. 2011, pp.257\u2013266.","DOI":"10.1145\/1941553.1941589"},{"issue":"2","key":"1484_CR21","doi-asserted-by":"crossref","first-page":"232","DOI":"10.1109\/JPROC.2004.840306","volume":"93","author":"M Pjschel","year":"2005","unstructured":"Pjschel M, Moura J M F, Johnson J R, Padua D, Veloso M, Singer B W, Xiong J, Franchetti F, Gacic A, Voronenko Y, Chen K, Johnson R W, Rizzolo N. SPIRAL: Code generation for DSP transforms. Proceedings of the IEEE, 2005, 93(2): 232\u2013275.","journal-title":"Proceedings of the IEEE"},{"key":"1484_CR22","doi-asserted-by":"crossref","unstructured":"Caballero D, Duran A, Martorell X. An OpenMP* barrier using SIMD instructions for Intel\u00ae Xeon PhiTM coprocessor. In Proc. the 9th IWOMP, Sept. 2013, pp.99\u2013113.","DOI":"10.1007\/978-3-642-40698-0_8"},{"key":"1484_CR23","doi-asserted-by":"crossref","unstructured":"Krishnaiyer R, Kultursay E, Chawla P, Preis S, Zvezdin A, Saito H. Compiler-based data prefetching and streaming non-temporal store generation for the Intel\u00ae Xeon PhiTM coprocessor. In Proc. the 27th IPDPSW, May 2013, pp.1575\u20131586.","DOI":"10.1109\/IPDPSW.2013.231"},{"issue":"6","key":"1484_CR24","doi-asserted-by":"crossref","first-page":"90","DOI":"10.1109\/MSP.2009.934155","volume":"26","author":"F Franchetti","year":"2009","unstructured":"Franchetti F, Puschel M, Voronenko Y, Chellappa S, Moura J M. Discrete Fourier transform on multicore. IEEE Signal Processing Magazine, 2009, 26(6): 90\u2013102.","journal-title":"IEEE Signal Processing Magazine"},{"key":"1484_CR25","doi-asserted-by":"crossref","unstructured":"Chen L, Hu Z, Lin J, Gao G R. Optimizing the fast Fourier transform on a multi-core architecture. In Proc. the 21st IPDPS, Mar. 2007.","DOI":"10.1109\/IPDPS.2007.370639"},{"key":"1484_CR26","doi-asserted-by":"crossref","unstructured":"Chen L, Gao G R. Performance analysis of Cooley-Tukey FFT algorithms for a many-core architecture. In Proc. SpringSim, Apr. 2010, Article No. 81.","DOI":"10.1145\/1878537.1878622"},{"key":"1484_CR27","unstructured":"Almaless G, Wajsburt F. Does shared-memory, highly multi-threaded, single-application scale on many-cores? In Proc. the 4th HotPar, Jun. 2012."},{"key":"1484_CR28","doi-asserted-by":"crossref","unstructured":"Heinecke A, Vaidyanathan K, Smelyanskiy M, Kobotov A, Dubtsov A, Henry G, Shet A G, Chrysos G, Dubey P. Design and implementation of the Linpack benchmark for single and multi-node systems based on Intel\u00ae Xeon PhiTM coprocessor. In Proc. the 27th IPDPS, May 2013, pp.126\u2013137.","DOI":"10.1109\/IPDPS.2013.113"},{"key":"1484_CR29","doi-asserted-by":"crossref","unstructured":"Liu X, Smelyanskiy M, Chow E, Dubey P. E\u00b1cient sparse matrix\u2013vector multiplication on x86-based many-core processors. In Proc. the 27th ICS, Jun. 2013, pp.273\u2013282.","DOI":"10.1145\/2464996.2465013"},{"key":"1484_CR30","doi-asserted-by":"crossref","unstructured":"Park J, Bikshandi G, Vaidyanathan K, Tang P T P, Dubey P, Kim D. Tera-scale 1D FFT with low-communication algorithm and Intel\u00ae Xeon PhiTM coprocessors. In Proc. SC, Nov. 2013, Article No. 34.","DOI":"10.1145\/2503210.2503242"}],"container-title":["Journal of Computer Science and Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-014-1484-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11390-014-1484-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-014-1484-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,13]],"date-time":"2025-05-13T20:18:38Z","timestamp":1747167518000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11390-014-1484-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,11]]},"references-count":30,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2014,11]]}},"alternative-id":["1484"],"URL":"https:\/\/doi.org\/10.1007\/s11390-014-1484-z","relation":{},"ISSN":["1000-9000","1860-4749"],"issn-type":[{"type":"print","value":"1000-9000"},{"type":"electronic","value":"1860-4749"}],"subject":[],"published":{"date-parts":[[2014,11]]}}}