{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T12:32:53Z","timestamp":1773318773651,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T00:00:00Z","timestamp":1763164800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000015","name":"DOE U.S. Department of Energy","doi-asserted-by":"publisher","award":["LAB-24-3210"],"award-info":[{"award-number":["LAB-24-3210"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"name":"U.S. National Science Foundation","award":["2234376"],"award-info":[{"award-number":["2234376"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3712285.3759869","type":"proceedings-article","created":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T16:04:47Z","timestamp":1762963487000},"page":"152-166","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Automatic Generation of Mappings for Distributed Fourier Operations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7271-8092","authenticated-orcid":false,"given":"Doru Thom","family":"Popovici","sequence":"first","affiliation":[{"name":"Lawrence Berkeley National Laboratory (LBNL), Berkeley, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1481-6399","authenticated-orcid":false,"given":"Botao","family":"Wu","sequence":"additional","affiliation":[{"name":"The Ohio State University, Columbus, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0608-3690","authenticated-orcid":false,"given":"John","family":"Shalf","sequence":"additional","affiliation":[{"name":"Lawrence Berkeley National Laboratory (LBNL), Berkeley, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8008-0220","authenticated-orcid":false,"given":"Martin","family":"Kong","sequence":"additional","affiliation":[{"name":"The Ohio State University, Columbus, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"2022. Scalapack Development Repository. https:\/\/github.com\/Reference-ScaLAPACK\/scalapack"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-50371-0_19"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"crossref","unstructured":"Jehoshua Bruck Ching-Tien Ho Shlomo Kipnis Eli Upfal and Derrick Weathersby. 1997. Efficient algorithms for all-to-all communications in multiport message-passing systems. IEEE Transactions on parallel and distributed systems 8 11 (1997) 1143\u20131156.","DOI":"10.1109\/71.642949"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","unstructured":"Eric\u00a0J. Bylaska. 2017. Chapter Five - Plane-Wave DFT Methods for Chemistry. Annual Reports in Computational Chemistry Vol.\u00a013. Elsevier 185\u2013228. 10.1016\/bs.arcc.2017.06.006","DOI":"10.1016\/bs.arcc.2017.06.006"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","unstructured":"Eric\u00a0J Bylaska Kiril Tsemekhman Scott\u00a0B Baden John\u00a0H Weare and Hannes Jonsson. 2011. Parallel implementation of \u03b3 -point pseudopotential plane-wave DFT with exact exchange. Journal of Computational Chemistry 32 1 (2011) 54\u201369. 10.1002\/jcc.21598","DOI":"10.1002\/jcc.21598"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","unstructured":"A Canning LW Wang A Williamson and A Zunger. 2000. Parallel empirical pseudopotential electronic structure calculations for million atom systems. J. Comput. Phys. 160 1 (2000) 29\u201341. 10.1006\/jcph.2000.6440","DOI":"10.1006\/jcph.2000.6440"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"crossref","unstructured":"Chen-Chun Chen Kawthar\u00a0Shafie Khorassani Quentin\u00a0G. Anthony Aamir Shafi Hari Subramoni and Dhabaleswar\u00a0K. Panda. 2022. Highly Efficient Alltoall and Alltoallv Communication Algorithms for GPU Systems. https:\/\/mug.mvapich.cse.ohio-state.edu\/static\/media\/mug\/presentations\/22\/wednesday-chen-chun-mug22-short-talk.pdf. Presentation slides Accessed: Jan. 2025.","DOI":"10.1109\/IPDPSW55747.2022.00014"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/FMPC.1992.234898"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","unstructured":"James\u00a0W Cooley and John\u00a0W Tukey. 1965. An algorithm for the machine calculation of complex Fourier series. Mathematics of computation 19 90 (1965) 297\u2013301. 10.1090\/S0025-5718-1965-0178586-1","DOI":"10.1090\/S0025-5718-1965-0178586-1"},{"key":"e_1_3_3_2_11_2","unstructured":"Leonardo de Moura. [n. d.]. Z3 API in Python. https:\/\/ericpony.github.io\/z3py-tutorial\/guide-examples.htm Online; Accessed April 2025."},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-78800-3_24"},{"key":"e_1_3_3_2_13_2","unstructured":"Leonardo De\u00a0Moura and Nikolaj Bj\u00f8rner. 2024. Z3 Prover. https:\/\/github.com\/Z3Prover\/z3"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","unstructured":"Franz Franchetti Tze\u00a0Meng Low Doru\u00a0Thom Popovici Richard\u00a0M. Veras Daniele\u00a0G. Spampinato Jeremy\u00a0R. Johnson Markus P\u00fcschel James\u00a0C. Hoe and Jos\u00e9 M.\u00a0F. Moura. 2018. SPIRAL: Extreme Performance Portability. Proc. IEEE 106 11 (2018) 1935\u20131968. 10.1109\/JPROC.2018.2873289","DOI":"10.1109\/JPROC.2018.2873289"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2006.31"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","unstructured":"M. Frigo and S.G. Johnson. 2005. The Design and Implementation of FFTW3. Proc. IEEE 93 2 (2005) 216\u2013231. 10.1109\/JPROC.2004.840301","DOI":"10.1109\/JPROC.2004.840301"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/1464291.1464352"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","unstructured":"Amir Gholami Judith Hill Dhairya Malhotra and George Biros. 2015. AccFFT: A library for distributed-memory FFT on CPU and GPU architectures. (2015). 10.48550\/arXiv.1506.07933","DOI":"10.48550\/arXiv.1506.07933"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","unstructured":"J\u00fcrgen Hafner. 2008. Ab-initio simulations of materials using VASP: Density-functional theory and beyond. Journal of computational chemistry 29 13 (2008) 2044\u20132078. 10.1002\/jcc.21057","DOI":"10.1002\/jcc.21057"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","unstructured":"M\u00e1rcia\u00a0A Inda and Rob\u00a0H Bisseling. 2001. A simple and efficient parallel FFT algorithm using the BSP model. Parallel Comput. 27 14 (2001) 1847\u20131878. 10.1016\/S0167-8191(01)00118-1","DOI":"10.1016\/S0167-8191(01)00118-1"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","unstructured":"Weile Jia Zongyan Cao Long Wang Jiyun Fu Xuebin Chi Weiguo Gao and Lin-Wang Wang. 2013. The analysis of a plane wave pseudopotential density functional theory code on a GPU machine. Computer Physics Communications 184 1 (2013) 9\u201318. 10.1016\/j.cpc.2012.08.002","DOI":"10.1016\/j.cpc.2012.08.002"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","unstructured":"Jaewoon Jung Chigusa Kobayashi Toshiyuki Imamura and Yuji Sugita. 2016. Parallel implementation of 3D FFT with volumetric decomposition schemes for efficient molecular dynamics simulations. Computer Physics Communications 200 (2016) 57\u201365. 10.1016\/j.cpc.2015.10.024","DOI":"10.1016\/j.cpc.2015.10.024"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","unstructured":"Ricky\u00a0A Kendall Edoardo Apr\u00e0 David\u00a0E Bernholdt Eric\u00a0J Bylaska Michel Dupuis George\u00a0I Fann Robert\u00a0J Harrison Jialin Ju Jeffrey\u00a0A Nichols Jarek Nieplocha et\u00a0al. 2000. High performance computational chemistry: An overview of NWChem a distributed parallel application. Computer Physics Communications 128 1-2 (2000) 260\u2013283. 10.1016\/S0010-4655(00)00065-5","DOI":"10.1016\/S0010-4655(00)00065-5"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607096"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","unstructured":"Thomas Koopman and Rob\u00a0H Bisseling. 2023. Minimizing communication in the multidimensional FFT. SIAM Journal on Scientific Computing 45 6 (2023) C330\u2013C347. 10.1137\/22M1487242","DOI":"10.1137\/22M1487242"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","unstructured":"Karol Kowalski Raymond Bair Nicholas\u00a0P Bauman Jeffery\u00a0S Boschen Eric\u00a0J Bylaska Jeff Daily Wibe\u00a0A de Jong Thom Dunning\u00a0Jr Niranjan Govind Robert\u00a0J Harrison et\u00a0al. 2021. From NWChem to NWChemEx: Evolving with the computational chemistry landscape. Chemical reviews 121 8 (2021) 4962\u20134998. 10.1021\/acs.chemrev.0c00998","DOI":"10.1021\/acs.chemrev.0c00998"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356181"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","unstructured":"Ricardo\u00a0A Lebensohn. 2001. N-site modeling of a 3D viscoplastic polycrystal using fast Fourier transform. Acta materialia 49 14 (2001) 2723\u20132737. 10.1016\/S1359-6454(01)00172-0","DOI":"10.1016\/S1359-6454(01)00172-0"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","unstructured":"Ricardo\u00a0A Lebensohn Anand\u00a0K Kanjarla and Philip Eisenlohr. 2012. An elasto-viscoplastic formulation based on fast Fourier transforms for the prediction of micromechanical fields in polycrystalline materials. International Journal of Plasticity 32 (2012) 59\u201369. 10.1016\/j.ijplas.2011.12.005","DOI":"10.1016\/j.ijplas.2011.12.005"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","unstructured":"S-B Lee RA Lebensohn and Anthony\u00a0D Rollett. 2011. Modeling the viscoplastic micromechanical response of two-phase materials using Fast Fourier Transforms. International Journal of Plasticity 27 5 (2011) 707\u2013727. 10.1016\/j.ijplas.2010.09.002","DOI":"10.1016\/j.ijplas.2010.09.002"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","unstructured":"Zongyi Li Nikola Kovachki Kamyar Azizzadenesheli Burigede Liu Kaushik Bhattacharya Andrew Stuart and Anima Anandkumar. 2020. Fourier neural operator for parametric partial differential equations. arXiv (2020). 10.48550\/arXiv.2010.08895","DOI":"10.48550\/arXiv.2010.08895"},{"key":"e_1_3_3_2_32_2","unstructured":"NVIDIA Corporation. 2025. NVIDIA CUDA cuBLAS Library. https:\/\/developer.nvidia.com\/cublas Online; Accessed March 2025."},{"key":"e_1_3_3_2_33_2","unstructured":"NVIDIA Corporation. 2025. NVIDIA CUDA cuFFT Library. https:\/\/developer.nvidia.com\/cufft Online; Accessed March 2025."},{"key":"e_1_3_3_2_34_2","unstructured":"Ohio Supercomputer Center. 1987. Ohio Supercomputer Center. http:\/\/osc.edu\/ark:\/19495\/f5s1ph73"},{"key":"e_1_3_3_2_35_2","unstructured":"Ohio Supercomputer Center. 2022. Ascend Supercomputer. http:\/\/osc.edu\/ark:\/19495\/hpc3ww9d"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","unstructured":"Dmitry Pekurovsky. 2012. P3DFFT: A Framework for Parallel Computations of Fourier Transforms in Three Dimensions. SIAM Journal on Scientific Computing 34 4 (2012) C192\u2013C209. 10.1137\/11082748X","DOI":"10.1137\/11082748X"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.90.014101"},{"key":"e_1_3_3_2_38_2","volume-title":"FFTs for (mostly) Particle Codes within the DOE Exascale Computing Project.","author":"Plimpton Steven\u00a0J","year":"2017","unstructured":"Steven\u00a0J Plimpton. 2017. FFTs for (mostly) Particle Codes within the DOE Exascale Computing Project. Technical Report. Sandia National Lab.(SNL-NM), Albuquerque, NM (United States)."},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","unstructured":"Steven\u00a0J Plimpton and Aidan\u00a0P Thompson. 2012. Computational aspects of many-body potentials. MRS bulletin 37 5 (2012) 513\u2013521. 10.1557\/mrs.2012.96","DOI":"10.1557\/mrs.2012.96"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","unstructured":"Doru Popovici Botao Wu John Shalf and Martin Kong. 2025. Automatic Generation of Mappings for Distributed Fourier Operations - Artifact SC25. 10.5281\/zenodo.16740635Online; Accessed August 2025..","DOI":"10.5281\/zenodo.16740635"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/3447818.3460354"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"publisher","unstructured":"Doru\u00a0Thom Popovici Martin\u00a0D Schatz Franz Franchetti and Tze\u00a0Meng Low. 2020. A flexible framework for multidimensional DFTs. SIAM Journal on Scientific Computing 42 5 (2020) C245\u2013C264. 10.1137\/19M1288401","DOI":"10.1137\/19M1288401"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","unstructured":"Jack Poulson Bryan Marker Robert\u00a0A Van\u00a0de Geijn Jeff\u00a0R Hammond and Nichols\u00a0A Romero. 2013. Elemental: A new framework for distributed memory dense matrix computations. ACM Transactions on Mathematical Software (TOMS) 39 2 (2013) 13. 10.1145\/2427023.2427030","DOI":"10.1145\/2427023.2427030"},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"publisher","unstructured":"Martin\u00a0D Schatz Robert\u00a0A Van\u00a0de Geijn and Jack Poulson. 2016. Parallel matrix multiplication: A systematic journey. SIAM Journal on Scientific Computing 38 6 (2016) C748\u2013C781. 10.1137\/140993478","DOI":"10.1137\/140993478"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/2581122.2544155"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-444-53835-2.00007-9"},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","unstructured":"Paul\u00a0N Swarztrauber. 1984. FFT algorithms for vector computers. Parallel Comput. 1 1 (1984) 45\u201363. 10.1016\/S0167-8191(84)90413-7","DOI":"10.1016\/S0167-8191(84)90413-7"},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"publisher","unstructured":"Daisuke Takahashi. 2003. A parallel 1-D FFT algorithm for the Hitachi SR8000. Parallel Comput. 29 6 (2003) 679\u2013690. 10.1016\/S0167-8191(03)00039-5","DOI":"10.1016\/S0167-8191(03)00039-5"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-14390-863"},{"key":"e_1_3_3_2_50_2","unstructured":"Doru Thom\u00a0Popovici Mauro del Ben Osni Marques and Andrew Canning. 2024. Flexible Multi-Dimensional FFTs for Plane Wave Density Functional Theory Codes. arXiv e-prints (2024) arXiv\u20132406. https:\/\/arxiv.org\/abs\/2406.05577"},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","unstructured":"Marat Valiev Eric\u00a0J Bylaska Niranjan Govind Karol Kowalski Tjerk\u00a0P Straatsma Hubertus\u00a0JJ Van\u00a0Dam Dunyou Wang Jarek Nieplocha Edoardo Apra Theresa\u00a0L Windus et\u00a0al. 2010. NWChem: a comprehensive and scalable open-source solution for large scale molecular simulations. Computer Physics Communications 181 9 (2010) 1477\u20131489. 10.1016\/j.cpc.2010.04.018","DOI":"10.1016\/j.cpc.2010.04.018"},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"crossref","unstructured":"Robert\u00a0A Van De\u00a0Geijn and Jerrell Watts. 1997. SUMMA: Scalable universal matrix multiplication algorithm. Concurrency: Practice and Experience 9 4 (1997) 255\u2013274. https:\/\/onlinelibrary.wiley.com\/doi\/pdf\/10.1002\/(SICI)1096-9128(199704)9:4%3C255::AID-CPE250%3E3.0.CO;2-2","DOI":"10.1002\/(SICI)1096-9128(199704)9:4<255::AID-CPE250>3.0.CO;2-2"},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","unstructured":"J-L Vay A Almgren J Bell L Ge DP Grote M Hogan O Kononenko R Lehe A Myers C Ng et\u00a0al. 2018. Warp-X: A new exascale computing platform for beam\u2013plasma simulations. Nuclear Instruments and Methods in Physics Research Section A: Accelerators Spectrometers Detectors and Associated Equipment (2018). 10.1016\/j.nima.2018.01.035","DOI":"10.1016\/j.nima.2018.01.035"},{"key":"e_1_3_3_2_54_2","doi-asserted-by":"publisher","unstructured":"Jean-Luc Vay Irving Haber and Brendan\u00a0B Godfrey. 2013. A domain decomposition method for pseudo-spectral electromagnetic simulations of plasmas. J. Comput. Phys. 243 (2013) 260\u2013268. 10.1016\/j.jcp.2013.03.010","DOI":"10.1016\/j.jcp.2013.03.010"},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"crossref","unstructured":"Gege Wen Zongyi Li Kamyar Azizzadenesheli Anima Anandkumar and Sally\u00a0M Benson. 2022. U-FNO\u2014An enhanced Fourier neural operator-based deep-learning model for multiphase flow. Advances in Water Resources 163 (2022) 104180. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0309170822000562","DOI":"10.1016\/j.advwatres.2022.104180"}],"event":{"name":"SC '25: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St. Louis MO USA","acronym":"SC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3712285.3759869","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759869","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759869","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T18:51:10Z","timestamp":1773255070000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3712285.3759869"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":54,"alternative-id":["10.1145\/3712285.3759869","10.1145\/3712285"],"URL":"https:\/\/doi.org\/10.1145\/3712285.3759869","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}