{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:48:09Z","timestamp":1742914089469,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":21,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819615414"},{"type":"electronic","value":"9789819615421"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-1542-1_12","type":"book-chapter","created":{"date-parts":[[2025,2,14]],"date-time":"2025-02-14T17:17:54Z","timestamp":1739553474000},"page":"204-218","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Parallel Implementation of\u00a0Number-Theoretic Transform on\u00a0GPU Clusters"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1357-5770","authenticated-orcid":false,"given":"Daisuke","family":"Takahashi","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,2,15]]},"reference":[{"key":"12_CR1","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1007\/BF00162341","volume":"4","author":"DH Bailey","year":"1990","unstructured":"Bailey, D.H.: FFTs in external or hierarchical memory. J. Supercomput. 4, 23\u201335 (1990)","journal-title":"J. Supercomput."},{"key":"12_CR2","doi-asserted-by":"crossref","unstructured":"Boemer, F., Kim, S., Seifu, G., de\u00a0Souza, F.D.M., Gopal, V.: Intel HEXL: accelerating homomorphic encryption with Intel AVX512-IFMA52. In: Proceedings of 9th Workshop on Encrypted Computing & Applied Homomorphic Cryptography (WAHC 2021), pp. 57\u201362 (2021)","DOI":"10.1145\/3474366.3486926"},{"key":"12_CR3","unstructured":"Boemer, F., et\u00a0al.: Intel HEXL. https:\/\/github.com\/intel\/hexl"},{"key":"12_CR4","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1109\/TAU.1967.1161899","volume":"15","author":"WT Cochran","year":"1967","unstructured":"Cochran, W.T., et al.: What is the fast Fourier transform? IEEE Trans. Audio Electroacoust. 15, 45\u201355 (1967)","journal-title":"IEEE Trans. Audio Electroacoust."},{"key":"12_CR5","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1090\/S0025-5718-1965-0178586-1","volume":"19","author":"JW Cooley","year":"1965","unstructured":"Cooley, J.W., Tukey, J.W.: An algorithm for the machine calculation of complex Fourier series. Math. Comput. 19, 297\u2013301 (1965)","journal-title":"Math. Comput."},{"key":"12_CR6","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1016\/j.jsc.2013.09.002","volume":"60","author":"D Harvey","year":"2014","unstructured":"Harvey, D.: Faster arithmetic for number-theoretic transforms. J. Symb. Comput. 60, 113\u2013119 (2014)","journal-title":"J. Symb. Comput."},{"key":"12_CR7","doi-asserted-by":"crossref","unstructured":"van\u00a0der Hoeven, J., Lecerf, G., Quintin, G.: Modular SIMD arithmetic in Mathemagix. ACM Trans. Math. Softw. 43 (2016). Article 5","DOI":"10.1145\/2876503"},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"Jesus, R., e\u00a0Silva, T.O., Weiland, M.: Vectorizing and distributing number-theoretic transform to count Goldbach partitions on Arm-based supercomputers. Concurr. Comput. Pract. Exp. 35, e7882 (2023)","DOI":"10.1002\/cpe.7882"},{"key":"12_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1007\/978-3-319-02297-0_21","volume-title":"Computer Algebra in Scientific Computing","author":"L Meng","year":"2013","unstructured":"Meng, L., Johnson, J.: Automatic parallel library generation for general-size modular FFT algorithms. In: Gerdt, V.P., Koepf, W., Mayr, E.W., Vorozhtsov, E.V. (eds.) CASC 2013. LNCS, vol. 8136, pp. 243\u2013256. Springer, Cham (2013). https:\/\/doi.org\/10.1007\/978-3-319-02297-0_21"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Meng, L., Johnson, J.R., Franchetti, F., Voronenko, Y., Maza, M.M., Xie, Y.: Spiral-generated modular FFT algorithms. In: Proceedings of 4th International Workshop on Parallel and Symbolic Computation (PASCO 2010), pp. 169\u2013170 (2010)","DOI":"10.1145\/1837210.1837235"},{"key":"12_CR11","doi-asserted-by":"publisher","first-page":"519","DOI":"10.1090\/S0025-5718-1985-0777282-X","volume":"44","author":"PL Montgomery","year":"1985","unstructured":"Montgomery, P.L.: Modular multiplication without trial division. Math. Comput. 44, 519\u2013521 (1985)","journal-title":"Math. Comput."},{"key":"12_CR12","unstructured":"NVIDIA: GPUDirect RDMA, Release 12.5 (2024). https:\/\/docs.nvidia.com\/cuda\/pdf\/GPUDirect_RDMA.pdf"},{"key":"12_CR13","unstructured":"\u00d6zcan, A.\u015e., Sava\u015f, E.: Two algorithms for fast GPU implementation of NTT. Cryptology ePrint Archive, Paper 2023\/1410 (2023). https:\/\/eprint.iacr.org\/2023\/1410"},{"key":"12_CR14","doi-asserted-by":"publisher","first-page":"2840","DOI":"10.1007\/s11227-021-03980-5","volume":"78","author":"\u00d6 \u00d6zerk","year":"2022","unstructured":"\u00d6zerk, \u00d6., Elgezen, C., Mert, A.C., \u00d6zt\u00fcrk, E.: Efficient number theoretic transform implementation on GPU for homomorphic encryption. J. Supercomput. 78, 2840\u20132872 (2022)","journal-title":"J. Supercomput."},{"key":"12_CR15","doi-asserted-by":"publisher","first-page":"365","DOI":"10.1090\/S0025-5718-1971-0301966-0","volume":"25","author":"JM Pollard","year":"1971","unstructured":"Pollard, J.M.: The fast Fourier transform in a finite field. Math. Comput. 25, 365\u2013374 (1971)","journal-title":"Math. Comput."},{"key":"12_CR16","unstructured":"Shoup, V.: NTL: a library for doing number theory. https:\/\/libntl.org"},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"Suresh, K.K., et\u00a0al.: A novel framework for efficient offloading of communication operations to Bluefield SmartNICs. In: Proceedings of 2023 IEEE International Parallel and Distributed Processing Symposium (IPDPS 2023), pp. 123\u2013133 (2023)","DOI":"10.1109\/IPDPS54959.2023.00022"},{"key":"12_CR18","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1016\/S0167-8191(84)90413-7","volume":"1","author":"PN Swarztrauber","year":"1984","unstructured":"Swarztrauber, P.N.: FFT algorithms for vector computers. Parallel Comput. 1, 45\u201363 (1984)","journal-title":"Parallel Comput."},{"key":"12_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"655","DOI":"10.1007\/978-3-030-58814-4_52","volume-title":"Computational Science and Its Applications \u2013 ICCSA 2020","author":"D Takahashi","year":"2020","unstructured":"Takahashi, D.: Fast multiple montgomery multiplications using intel AVX-512IFMA instructions. In: Gervasi, O., et al. (eds.) ICCSA 2020. LNCS, vol. 12253, pp. 655\u2013663. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58814-4_52"},{"key":"12_CR20","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"318","DOI":"10.1007\/978-3-031-14788-3_18","volume-title":"CASC 2022","author":"D Takahashi","year":"2022","unstructured":"Takahashi, D.: An implementation of parallel number-theoretic transform using Intel AVX-512 instructions. In: Boulier, F., et al. (eds.) CASC 2022. LNCS, vol. 13366, pp. 318\u2013332. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-14788-3_18"},{"key":"12_CR21","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611970999","volume-title":"Computational Frameworks for the Fast Fourier Transform","author":"C Van Loan","year":"1992","unstructured":"Van Loan, C.: Computational Frameworks for the Fast Fourier Transform. SIAM Press, Philadelphia, PA (1992)"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-1542-1_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,14]],"date-time":"2025-02-14T17:18:01Z","timestamp":1739553481000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-1542-1_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819615414","9789819615421"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-1542-1_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"15 February 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICA3PP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Algorithms and Architectures for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Macau","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ica3pp2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ica3pp2024.scimeeting.cn\/en\/web\/index\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}