{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T22:23:00Z","timestamp":1770070980242,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2012,6,25]],"date-time":"2012-06-25T00:00:00Z","timestamp":1340582400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2012,6,25]]},"DOI":"10.1145\/2304576.2304604","type":"proceedings-article","created":{"date-parts":[[2012,6,27]],"date-time":"2012-06-27T13:31:21Z","timestamp":1340803881000},"page":"205-214","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":40,"title":["On the communication complexity of 3D FFTs and its implications for Exascale"],"prefix":"10.1145","author":[{"given":"Kenneth","family":"Czechowski","sequence":"first","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"Casey","family":"Battaglino","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"Chris","family":"McClanahan","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"Kartik","family":"Iyer","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"P.-K.","family":"Yeung","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]},{"given":"Richard","family":"Vuduc","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, GA, USA"}]}],"member":"320","published-online":{"date-parts":[[2012,6,25]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"The HPC Challenge benchmark. http:\/\/icl.cs.utk.edu\/hpcc.  The HPC Challenge benchmark. http:\/\/icl.cs.utk.edu\/hpcc."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/SHPCC.1994.296635"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2002.993003"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1965724.1965747"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.5555\/1898953.1899016"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/197912.197923"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2006.65"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/0743-7315(88)90002-0"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.5555\/1791889.1791927"},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the 4th Annual Linux Showcase & Conference","author":"Cramer C. E.","year":"2000","unstructured":"C. E. Cramer and J. Board . The development and integration of a distributed 3D FFT for a cluster of workstations . In Proceedings of the 4th Annual Linux Showcase & Conference , Atlanta, GA, USA , 2000 . C. E. Cramer and J. Board. The development and integration of a distributed 3D FFT for a cluster of workstations. In Proceedings of the 4th Annual Linux Showcase & Conference, Atlanta, GA, USA, 2000."},{"key":"e_1_3_2_1_11_1","first-page":"1","volume-title":"USENIX Wkshp. Hot Topics in Parallelism (HotPar)","author":"Czechowski K.","year":"2011","unstructured":"K. Czechowski , C. Battaglino , C. Mcclanahan , A. Chandramowlishwaran , and R. Vuduc . Balance principles for algorithm-architecture co-design . In USENIX Wkshp. Hot Topics in Parallelism (HotPar) , pages 1 -- 5 , Berkeley, CA , USA, 2011 . Usenix Association . K. Czechowski, C. Battaglino, C. Mcclanahan, A. Chandramowlishwaran, and R. Vuduc. Balance principles for algorithm-architecture co-design. In USENIX Wkshp. Hot Topics in Parallelism (HotPar), pages 1--5, Berkeley, CA, USA, 2011. Usenix Association."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-69330-7_14"},{"key":"e_1_3_2_1_13_1","first-page":"70","volume-title":"Proceedings of 7th SIAM Conference on Parallel Processing","author":"Ding H. Q.","year":"1995","unstructured":"H. Q. Ding , R. D. Ferraro , and D. B. Gennery . A Portable 3D FFT Package for Distributed-Memory Parallel Architectures . In Proceedings of 7th SIAM Conference on Parallel Processing , pages 70 -- 71 . SIAM Press , 1995 . H. Q. Ding, R. D. Ferraro, and D. B. Gennery. A Portable 3D FFT Package for Distributed-Memory Parallel Architectures. In Proceedings of 7th SIAM Conference on Parallel Processing, pages 70--71. SIAM Press, 1995."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-8191(01)00120-X"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2010.38"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342010391989"},{"key":"e_1_3_2_1_17_1","volume-title":"Turbulence simulations on o(104) processors","author":"Donzis D.","year":"2008","unstructured":"D. Donzis , P. Yeung , and D. Pekurovsky . Turbulence simulations on o(104) processors . 2008 . D. Donzis, P. Yeung, and D. Pekurovsky. Turbulence simulations on o(104) processors. 2008."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1137\/S1064827597316266"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1147\/rd.492.0457"},{"issue":"8","key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","first-page":"531","DOI":"10.1016\/j.cpc.2006.12.006","article-title":"Performance of the 3D FFT on the 6D network torus QCDOC parallel supercomputer","volume":"176","author":"B.","year":"2007","unstructured":"B. FANG, Y. DENG, and G. MARTYNA . Performance of the 3D FFT on the 6D network torus QCDOC parallel supercomputer . Computer Physics Communications , 176 ( 8 ): 531 -- 538 , Apr. 2007 . B. FANG, Y. DENG, and G. MARTYNA. Performance of the 3D FFT on the 6D network torus QCDOC parallel supercomputer. Computer Physics Communications, 176(8):531--538, Apr. 2007.","journal-title":"Computer Physics Communications"},{"key":"e_1_3_2_1_21_1","first-page":"285","volume-title":"Proceedings of the 40th Annual Symposium on Foundations of Computer Science, FOCS '99","author":"Frigo M.","unstructured":"M. Frigo , C. E. Leiserson , H. Prokop , and S. Ramachandran . Cache-oblivious algorithms . In Proceedings of the 40th Annual Symposium on Foundations of Computer Science, FOCS '99 , pages 285 --, Washington, DC, USA, 1999. IEEE Computer Society. M. Frigo, C. E. Leiserson, H. Prokop, and S. Ramachandran. Cache-oblivious algorithms. In Proceedings of the 40th Annual Symposium on Foundations of Computer Science, FOCS '99, pages 285--, Washington, DC, USA, 1999. IEEE Computer Society."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2010.5470417"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/1839676.1839694"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1177\/109434200101500104"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/1810085.1810127"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/L-CA.2009.4"},{"key":"e_1_3_2_1_27_1","first-page":"1","volume-title":"Proceedings of the Cray User's Group (CUG) Meeting","author":"Hein J.","year":"2008","unstructured":"J. Hein , H. Jagode , U. Sigrist , A. Simpson , and A. Trew . Parallel 3D-FFTs for multi-core nodes on a mesh communication network . In Proceedings of the Cray User's Group (CUG) Meeting , pages 1 -- 15 , Helsinki, Finland , 2008 . J. Hein, H. Jagode, U. Sigrist, A. Simpson, and A. Trew. Parallel 3D-FFTs for multi-core nodes on a mesh communication network. In Proceedings of the Cray User's Group (CUG) Meeting, pages 1--15, Helsinki, Finland, 2008."},{"key":"e_1_3_2_1_28_1","volume-title":"Task placement of parallel multidimensional ffts on a mesh communication network","author":"Jagode H.","year":"2008","unstructured":"H. Jagode , J. Hein , and A. Trew . Task placement of parallel multidimensional ffts on a mesh communication network . University of Tennessee Knoxville , Technical Report No. ut-cs-08--613, 2008 . H. Jagode, J. Hein, and A. Trew. Task placement of parallel multidimensional ffts on a mesh communication network. University of Tennessee Knoxville, Technical Report No. ut-cs-08--613, 2008."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/800076.802486"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2063384.2063421"},{"key":"e_1_3_2_1_31_1","volume-title":"Sept.","author":"Kogge P.","year":"2008","unstructured":"P. Kogge Exascale Computing Study: Technology challenges in acheiving exascale systems , Sept. 2008 . P. Kogge et al. Exascale Computing Study: Technology challenges in acheiving exascale systems, Sept. 2008."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2008.83"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.5555\/17407.17362"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2008.15"},{"key":"e_1_3_2_1_35_1","volume-title":"Dec.","author":"McCalpin J.","year":"1995","unstructured":"J. McCalpin . Memory Bandwidth and Machine Balance in High Performance Computers. IEEE Technical Committee on Computer Architecture (TCCA) Newsletter , Dec. 1995 . J. McCalpin. Memory Bandwidth and Machine Balance in High Performance Computers. IEEE Technical Committee on Computer Architecture (TCCA) Newsletter, Dec. 1995."},{"key":"e_1_3_2_1_36_1","unstructured":"D. Pekurovsky and J. H. Goebbert. P3DFFT -- highly scalable parallel 3d fast fourier transforms library. http:\/\/www.sdsc.edu\/us\/resources\/p3dfft November 2010.  D. Pekurovsky and J. H. Goebbert. P3DFFT -- highly scalable parallel 3d fast fourier transforms library. http:\/\/www.sdsc.edu\/us\/resources\/p3dfft November 2010."},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of the International Workshop on Performance Modeling, Benchmarking and Simulation (PMBS)","author":"Pennycook S. J.","year":"2010","unstructured":"S. J. Pennycook , S. D. Hammond , S. A. Jarvis , and G. R. Mudalige . Performance analysis of a hybrid MPI\/CUDA implementation of the NAS-LU benchmark . In Proceedings of the International Workshop on Performance Modeling, Benchmarking and Simulation (PMBS) , New Orleans, LA, USA , Nov. 2010 . S. J. Pennycook, S. D. Hammond, S. A. Jarvis, and G. R. Mudalige. Performance analysis of a hybrid MPI\/CUDA implementation of the NAS-LU benchmark. In Proceedings of the International Workshop on Performance Modeling, Benchmarking and Simulation (PMBS), New Orleans, LA, USA, Nov. 2010."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/1816038.1816002"},{"key":"e_1_3_2_1_39_1","volume-title":"The University of Edinburgh","author":"Sigrist U.","year":"2007","unstructured":"U. Sigrist . Optimizing parallel 3D fast Fourier transformations for a cluster of IBM POWER5 SMP nodes. PhD thesis , The University of Edinburgh , 2007 . U. Sigrist. Optimizing parallel 3D fast Fourier transformations for a cluster of IBM POWER5 SMP nodes. PhD thesis, The University of Edinburgh, 2007."},{"key":"e_1_3_2_1_40_1","first-page":"316","volume-title":"Proceedings of Applied Parallel Computing: New Paradigms for HPC in Industry and","author":"Takahashi D.","year":"2001","unstructured":"D. Takahashi . A Parallel 3-D FFT Algorithm on Clusters of Vector SM Ps . In Proceedings of Applied Parallel Computing: New Paradigms for HPC in Industry and Academia , volume LNCS 1947 , pages 316 -- 323 , 2001 . D. Takahashi. A Parallel 3-D FFT Algorithm on Clusters of Vector SMPs. In Proceedings of Applied Parallel Computing: New Paradigms for HPC in Industry and Academia, volume LNCS 1947, pages 316--323, 2001."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2011.83"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00450-011-0171-3"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/1654059.1654083"}],"event":{"name":"ICS'12: International Conference on Supercomputing","location":"San Servolo Island, Venice Italy","acronym":"ICS'12","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 26th ACM international conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2304576.2304604","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2304576.2304604","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T08:48:47Z","timestamp":1750236527000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2304576.2304604"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,6,25]]},"references-count":43,"alternative-id":["10.1145\/2304576.2304604","10.1145\/2304576"],"URL":"https:\/\/doi.org\/10.1145\/2304576.2304604","relation":{},"subject":[],"published":{"date-parts":[[2012,6,25]]},"assertion":[{"value":"2012-06-25","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}