{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T20:06:50Z","timestamp":1778962010074,"version":"3.51.4"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T00:00:00Z","timestamp":1778889600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T00:00:00Z","timestamp":1778889600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Parallel Prog"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1007\/s10766-026-00820-y","type":"journal-article","created":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T19:14:29Z","timestamp":1778958869000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Efficient High-Performance Computing Strategies for the Legendre Pairs Search"],"prefix":"10.1007","volume":"54","author":[{"given":"Amirhossein","family":"Sojoodi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ilias S.","family":"Kotsireas","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ahmad","family":"Afsahi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,5,16]]},"reference":[{"key":"820_CR1","first-page":"75","volume":"23","author":"RJ Fletcher","year":"2001","unstructured":"Fletcher, R.J., Gysin, M., Seberry, J.: Application of the discrete Fourier transform to the search for generalised Legendre pairs and Hadamard matrices. Australas. J. Combin. 23, 75\u201386 (2001)","journal-title":"Australas. J. Combin."},{"issue":"11","key":"820_CR2","doi-asserted-by":"publisher","first-page":"814","DOI":"10.1002\/jcd.21745","volume":"28","author":"KT Arasu","year":"2020","unstructured":"Arasu, K.T., Bulutoglu, D.A., Hollon, J.R.: Legendre $$G$$-array pairs and the theoretical unification of several $$G$$-array families. J. Combin. Des. 28(11), 814\u2013841 (2020). https:\/\/doi.org\/10.1002\/jcd.21745","journal-title":"J. Combin. Des."},{"issue":"12","key":"820_CR3","doi-asserted-by":"publisher","first-page":"870","DOI":"10.1002\/jcd.21806","volume":"29","author":"I Kotsireas","year":"2021","unstructured":"Kotsireas, I., Koutschan, C.: Legendre pairs of lengths $$\\ell \\equiv 0~(mod 3)$$. J. Combin. Des. 29(12), 870\u2013887 (2021)","journal-title":"J. Combin. Des."},{"key":"820_CR4","doi-asserted-by":"publisher","first-page":"20230105","DOI":"10.1515\/spma-2023-0105","volume":"11","author":"IS Kotsireas","year":"2023","unstructured":"Kotsireas, I.S., Koutschan, C., Bulutoglu, D.A., Arquette, D.M., Turner, J.S., Ryan, K.J.: Legendre pairs of lengths $$\\ell \\equiv 0~(mod 5)$$. Spec. Matrices 11, 20230105\u201315 (2023)","journal-title":"Spec. Matrices"},{"issue":"6","key":"820_CR5","doi-asserted-by":"publisher","first-page":"1321","DOI":"10.1007\/s10623-021-00862-y","volume":"89","author":"JS Turner","year":"2021","unstructured":"Turner, J.S., Kotsireas, I.S., Bulutoglu, D.A., Geyer, A.J.: A Legendre pair of length 77 using complementary binary matrices with fixed marginals. Des. Codes Cryptogr. 89(6), 1321\u20131333 (2021)","journal-title":"Des. Codes Cryptogr."},{"key":"820_CR6","doi-asserted-by":"publisher","unstructured":"Apostol, T.M.: Introduction to Analytic Number Theory. In: Undergraduate Texts in Mathematics, p. 338 (1976). https:\/\/doi.org\/10.1007\/978-1-4757-5579-4","DOI":"10.1007\/978-1-4757-5579-4"},{"issue":"2","key":"820_CR7","doi-asserted-by":"publisher","first-page":"365","DOI":"10.1007\/s10623-013-9862-z","volume":"74","author":"DZ Djokovic","year":"2015","unstructured":"Djokovic, D.Z., Kotsireas, I.S.: Compression of periodic complementary sequences and applications. Des. Codes Cryptogr. 74(2), 365\u2013377 (2015). https:\/\/doi.org\/10.1007\/s10623-013-9862-z","journal-title":"Des. Codes Cryptogr."},{"key":"820_CR8","unstructured":"CUDA. https:\/\/docs.nvidia.com\/cuda\/index.htmlhttps:\/\/docs.nvidia.com\/cuda\/index.html Accessed: 2025-10-01 (2025)"},{"key":"820_CR9","unstructured":"OpenMP. https:\/\/www.openmp.org\/ (2024)"},{"key":"820_CR10","unstructured":"MPI Forum. https:\/\/www.mpi-forum.org\/ (2024)"},{"key":"820_CR11","doi-asserted-by":"crossref","unstructured":"Golomb, S.W., Gong, G.: Signal Design for Good Correlation: for Wireless Communication, Cryptography, and Radar, p. 438 (2005)","DOI":"10.1017\/CBO9780511546907"},{"issue":"1","key":"820_CR12","doi-asserted-by":"publisher","DOI":"10.1155\/2014\/560987","volume":"2014","author":"J Colmenares","year":"2014","unstructured":"Colmenares, J., Galizia, A., Ortiz, J., Clematis, A., Rocchia, W.: A combined MPI-CUDA parallel solution of linear and nonlinear Poisson-Boltzmann equation. Biomed. Res. Int. 2014(1), 560987 (2014). https:\/\/doi.org\/10.1155\/2014\/560987","journal-title":"Biomed. Res. Int."},{"issue":"1","key":"820_CR13","doi-asserted-by":"publisher","first-page":"266","DOI":"10.1016\/j.cpc.2010.06.035","volume":"182","author":"C-T Yang","year":"2011","unstructured":"Yang, C.-T., Huang, C.-L., Lin, C.-F.: Hybrid CUDA, OpenMP, and MPI parallel programming on multicore GPU clusters. Comput. Phys. Commun. 182(1), 266\u2013269 (2011). https:\/\/doi.org\/10.1016\/j.cpc.2010.06.035. (Computer Physics Communications Special Edition for Conference on Computational Physics Kaohsiung, Taiwan, Dec 15-19, 2009)","journal-title":"Comput. Phys. Commun."},{"key":"820_CR14","doi-asserted-by":"publisher","first-page":"905","DOI":"10.1093\/GJI\/GGAA042","volume":"221","author":"IE Venetis","year":"2020","unstructured":"Venetis, I.E., Saltogianni, V., Saltogianni, V., Stiros, S.C., Gallopoulos, E.: Multivariable inversion using exhaustive grid search and high-performance GPU processing: a new perspective. Geophys. J. Int. 221, 905\u2013927 (2020). https:\/\/doi.org\/10.1093\/GJI\/GGAA042","journal-title":"Geophys. J. Int."},{"issue":"10","key":"820_CR15","doi-asserted-by":"publisher","first-page":"2021","DOI":"10.3390\/electronics14102021","volume":"14","author":"S Eum","year":"2025","unstructured":"Eum, S., Song, M., Kim, S., Seo, H.: Efficient gpu parallel implementation and optimization of aria for counter and exhaustive key-search modes. Electronics 14(10), 2021 (2025). https:\/\/doi.org\/10.3390\/electronics14102021","journal-title":"Electronics"},{"key":"820_CR16","doi-asserted-by":"publisher","unstructured":"Shreeyansh, V., Gad, A.S., Rao, B.A., Kini, N.G.: Parallelizing exponential search algorithms with mpi and cuda for high-performance computing. In: 2025 International Conference on Next Generation Communication and Information Processing (INCIP), pp. 285\u2013288 (2025). https:\/\/doi.org\/10.1109\/INCIP64058.2025.11019520. IEEE","DOI":"10.1109\/INCIP64058.2025.11019520"},{"key":"820_CR17","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1016\/j.cpc.2016.07.029","volume":"209","author":"V Lon\u010dar","year":"2016","unstructured":"Lon\u010dar, V., Young-S, L.E., \u0160krbi\u0107, S., Muruganandam, P., Adhikari, S.K., Bala\u017e, A.: Openmp, openmp\/mpi, and cuda\/mpi c programs for solving the time-dependent dipolar gross-pitaevskii equation. Comput. Phys. Commun. 209, 190\u2013196 (2016). https:\/\/doi.org\/10.1016\/j.cpc.2016.07.029","journal-title":"Comput. Phys. Commun."},{"key":"820_CR18","unstructured":"MPICH. https:\/\/www.mpich.org\/ (2024)"},{"key":"820_CR19","unstructured":"MVAPICH. https:\/\/mvapich.cse.ohio-state.edu\/ (2024)"},{"key":"820_CR20","unstructured":"Open MPI. https:\/\/www.open-mpi.org\/ (2024)"},{"key":"820_CR21","doi-asserted-by":"publisher","unstructured":"Li, Y., Zhou, B., Zhang, J., Wei, X., Li, Y., Chen, Y.: Radik: Scalable and optimized gpu-parallel radix top-k selection. In: Proceedings of the 38th ACM International Conference on Supercomputing. ICS \u201924, pp. 537\u2013548, New York, NY, USA (2024). https:\/\/doi.org\/10.1145\/3650200.3656596. Association for Computing Machinery","DOI":"10.1145\/3650200.3656596"},{"issue":"15","key":"820_CR22","doi-asserted-by":"publisher","first-page":"4830","DOI":"10.3390\/s24154830","volume":"24","author":"U Iqbal","year":"2024","unstructured":"Iqbal, U., Davies, T., Perez, P.: A review of recent hardware and software advances in gpu-accelerated edge-computing single-board computers (sbcs) for computer vision. Sensors 24(15), 4830 (2024). https:\/\/doi.org\/10.3390\/s24154830","journal-title":"Sensors"},{"key":"820_CR23","doi-asserted-by":"publisher","unstructured":"Sojoodi, A.H., Salimi Beni, M., Khunjush, F.: Ignite-GPU: a GPU-enabled in-memory computing architecture on clusters. J. Supercomput. pp. 1\u201328 (2020). https:\/\/doi.org\/10.1007\/s11227-020-03390-z","DOI":"10.1007\/s11227-020-03390-z"},{"key":"820_CR24","unstructured":"NVIDIA Collective Communications Library. https:\/\/github.com\/NVIDIA\/nccl (2024)"},{"issue":"106","key":"820_CR25","doi-asserted-by":"publisher","first-page":"7484","DOI":"10.21105\/joss.07484","volume":"10","author":"R Prat","year":"2025","unstructured":"Prat, R., Carrard, T., Amarsid, L., Richefeu, V., Doncecchi, C., Lafourcade, P., Latu, G., Vanson, J.-M.: ExaDEM: a HPC application based on exaNBody targeting scalable DEM simulations with complex particle shapes. Open Source Softw. 10(106), 7484 (2025)","journal-title":"Open Source Softw."},{"issue":"11","key":"820_CR26","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3570638","volume":"55","author":"P Hijma","year":"2023","unstructured":"Hijma, P., Heldens, S., Sclocco, A., Van Werkhoven, B., Bal, H.E.: Optimization techniques for GPU programming. ACM Comput. Surv. 55(11), 1\u201381 (2023). https:\/\/doi.org\/10.1145\/3570638","journal-title":"ACM Comput. Surv."},{"key":"820_CR27","unstructured":"Yi, X.: A Study of Performance Programming of CPU, GPU accelerated Computers and SIMD Architecture. pp. 1\u201319 (2024). arXiv:2409.10661"},{"issue":"21","key":"820_CR28","doi-asserted-by":"publisher","first-page":"10377","DOI":"10.3390\/app112110377","volume":"11","author":"H Choi","year":"2021","unstructured":"Choi, H., Lee, J.: Efficient use of gpu memory for large-scale deep learning model training. Appl. Sci. 11(21), 10377 (2021). https:\/\/doi.org\/10.3390\/app112110377","journal-title":"Appl. Sci."},{"key":"820_CR29","doi-asserted-by":"publisher","unstructured":"Berney, K., Sitchinava, N.: Eliminating bank conflicts in gpu mergesort. In: Proceedings of the 37th ACM Symposium on Parallelism in Algorithms and Architectures. SPAA \u201925, pp. 158\u2013170, New York, NY, USA (2025). https:\/\/doi.org\/10.1145\/3694906.3743337. Association for Computing Machinery","DOI":"10.1145\/3694906.3743337"},{"key":"820_CR30","doi-asserted-by":"crossref","unstructured":"Lee, S., Oh, J., Kim, J., Go, S., Park, J., Mahajan, D.: Characterizing Compute-Communication Overlap in GPU-Accelerated Distributed Deep Learning: Performance and Power Implications. (2025). https:\/\/arxiv.org\/abs\/2507.03114","DOI":"10.1109\/ISPASS64960.2025.00041"},{"issue":"2","key":"820_CR31","doi-asserted-by":"publisher","DOI":"10.1088\/2632-2153\/ad51c9","volume":"5","author":"A Srikanth","year":"2024","unstructured":"Srikanth, A., Trigila, C., Roncali, E.: Gpu optimization techniques to accelerate optigan-a particle simulation gan. Mach. Learn.: Sci. Technol. 5(2), 027001 (2024). https:\/\/doi.org\/10.1088\/2632-2153\/ad51c9","journal-title":"Mach. Learn.: Sci. Technol."},{"issue":"3","key":"820_CR32","doi-asserted-by":"publisher","first-page":"551","DOI":"10.1007\/s10766-016-0433-6","volume":"45","author":"AH Khan","year":"2017","unstructured":"Khan, A.H., Al-Mouhamed, M., Al-Mulhem, M., Ahmed, A.F.: Rt-cuda: A software tool for cuda code restructuring. Int. J. Parallel Prog. 45(3), 551\u2013594 (2017). https:\/\/doi.org\/10.1007\/s10766-016-0433-6","journal-title":"Int. J. Parallel Prog."},{"issue":"5","key":"820_CR33","doi-asserted-by":"publisher","first-page":"1048","DOI":"10.3390\/electronics14051048","volume":"14","author":"R Kaur","year":"2025","unstructured":"Kaur, R., Asad, A., Al Abdul Wahid, S., Mohammadi, F.: A survey of advancements in scheduling techniques for efficient deep learning computations on gpus. Electronics 14(5), 1048 (2025). https:\/\/doi.org\/10.3390\/electronics14051048","journal-title":"Electronics"},{"key":"820_CR34","doi-asserted-by":"publisher","unstructured":"Murthy, G., Ravishankar, M., Baskaran, M., Sadayappan, P.: Optimal loop unrolling for gpgpu programs, pp. 1\u201311 (2010). https:\/\/doi.org\/10.1109\/IPDPS.2010.5470423. IEEE","DOI":"10.1109\/IPDPS.2010.5470423"},{"issue":"02","key":"820_CR35","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1109\/MM.2024.3504261","volume":"45","author":"R Matsuo","year":"2025","unstructured":"Matsuo, R., Degawa, Y., Irie, H., Sakai, S., Shioya, R.: Flexible Approximate Computing for Mitigating Branch Divergence in GPUs. IEEE Micro 45(02), 90\u2013100 (2025). https:\/\/doi.org\/10.1109\/MM.2024.3504261","journal-title":"IEEE Micro"},{"key":"820_CR36","doi-asserted-by":"publisher","first-page":"347","DOI":"10.1016\/j.future.2018.08.004","volume":"90","author":"B Werkhoven","year":"2019","unstructured":"Werkhoven, B.: Kernel tuner: A search-optimizing gpu code auto-tuner. Futur. Gener. Comput. Syst. 90, 347\u2013358 (2019). https:\/\/doi.org\/10.1016\/j.future.2018.08.004","journal-title":"Futur. Gener. Comput. Syst."},{"key":"820_CR37","doi-asserted-by":"publisher","unstructured":"Zhao, C., Gao, W., Nie, F., Zhou, H.: A survey of GPU multitasking methods supported by hardware architecture. IEEE Trans. Parallel Distrib. Syst. pp. 1\u201313 (2022). https:\/\/doi.org\/10.1109\/TPDS.2021.3115630","DOI":"10.1109\/TPDS.2021.3115630"},{"issue":"4","key":"820_CR38","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3695466","volume":"11","author":"C Cui","year":"2024","unstructured":"Cui, C.: Acceleration of tensor-product operations with tensor cores. ACM Trans. Parallel Comput. 11(4), 1\u201324 (2024)","journal-title":"ACM Trans. Parallel Comput."},{"key":"820_CR39","unstructured":"Sakdhnagool, P., Sabne, A., Eigenmann, R.: Regdem: Increasing GPU performance via shared memory register spilling. (2019). arXiv:abs\/1907.02894"},{"key":"820_CR40","unstructured":"NVIDIA: Advanced NVIDIA CUDA Kernel Optimization Techniques with Handwritten PTX. Accessed: 2025-01-01"},{"key":"820_CR41","unstructured":"Andrews, M., Witteveen, S.: GPU Kernel Scientist: An LLM-Driven Framework for Iterative Kernel Optimization. https:\/\/arxiv.org\/abs\/2506.20807 (2025)"},{"key":"820_CR42","unstructured":"Multi-Process Service.: https:\/\/docs.nvidia.com\/deploy\/mps\/ (2024)"},{"key":"820_CR43","unstructured":"Multi Instance GPU (MIG).: https:\/\/www.nvidia.com\/en-us\/technologies\/multi-instance-gpu\/ (2024)"}],"container-title":["International Journal of Parallel Programming"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-026-00820-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10766-026-00820-y","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-026-00820-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T19:14:33Z","timestamp":1778958873000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10766-026-00820-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,16]]},"references-count":43,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026,9]]}},"alternative-id":["820"],"URL":"https:\/\/doi.org\/10.1007\/s10766-026-00820-y","relation":{},"ISSN":["0885-7458","1573-7640"],"issn-type":[{"value":"0885-7458","type":"print"},{"value":"1573-7640","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,5,16]]},"assertion":[{"value":"14 November 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 April 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 May 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"13"}}