{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,30]],"date-time":"2025-04-30T05:09:05Z","timestamp":1745989745544,"version":"3.37.3"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2022,12,27]],"date-time":"2022-12-27T00:00:00Z","timestamp":1672099200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,12,27]],"date-time":"2022-12-27T00:00:00Z","timestamp":1672099200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100002322","name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002322","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003593","name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003593","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004263","name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado do Rio Grande do Sul","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004263","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007337","name":"Universidade Federal de Santa Maria","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100007337","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2023,5]]},"DOI":"10.1007\/s11227-022-04932-3","type":"journal-article","created":{"date-parts":[[2022,12,27]],"date-time":"2022-12-27T04:20:57Z","timestamp":1672114857000},"page":"8890-8911","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["NAS Parallel Benchmarks with Python: a performance and programming effort analysis focusing on GPUs"],"prefix":"10.1007","volume":"79","author":[{"given":"Daniel","family":"Di Domenico","sequence":"first","affiliation":[]},{"given":"Jo\u00e3o V. F.","family":"Lima","sequence":"additional","affiliation":[]},{"given":"Gerson G. H.","family":"Cavalheiro","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,12,27]]},"reference":[{"key":"4932_CR1","unstructured":"CUDA C++ Programming Guide: Version 11.2.1. Nvidia (2021)"},{"key":"4932_CR2","unstructured":"The OpenCL Spec.: Version 2.2. Khronos Working Group (2019)"},{"key":"4932_CR3","unstructured":"OpenACC Specification: Version 3.1. OpenACC.org (2020)"},{"key":"4932_CR4","unstructured":"SYCL 2020 Reference Guide: Revision 2. Khronos Working Group (2022)"},{"key":"4932_CR5","doi-asserted-by":"publisher","unstructured":"Holm HH, Brodtkorb AR, S\u00e6tra ML (2020) GPU computing with Python: performance, energy efficiency and usability. Computation 8(1). https:\/\/doi.org\/10.3390\/computation8010004","DOI":"10.3390\/computation8010004"},{"key":"4932_CR6","doi-asserted-by":"publisher","unstructured":"Ziogas AN, Ben-Nun T, Schneider T, Hoefler T (2021) NPBench: abenchmarking suite for high-performance NumPy. In: Proceedings of the ACM international conference on supercomputing. ICS\u201921. ACM, New York, NY, USA, pp 63\u201374 https:\/\/doi.org\/10.1145\/3447818.3460360","DOI":"10.1145\/3447818.3460360"},{"key":"4932_CR7","doi-asserted-by":"publisher","unstructured":"Oden L (2020) Lessons learned from comparing C-CUDA and Python-Numba for GPU-Computing. In: 2020 28th Euromicro international conference on parallel, distributed and network-based processing (PDP), pp 216\u2013223 https:\/\/doi.org\/10.1109\/PDP50117.2020.00041","DOI":"10.1109\/PDP50117.2020.00041"},{"key":"4932_CR8","unstructured":"Numba Documentation: Version 0.50. Anaconda, Inc. and others (2021)"},{"key":"4932_CR9","unstructured":"CuPy API Reference: Version 11.4. Preferred Infrastructure, Inc. and Preferred Networks, Inc. (2021)"},{"issue":"3","key":"4932_CR10","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1016\/j.parco.2011.09.001","volume":"38","author":"A Kl\u00f6ckner","year":"2012","unstructured":"Kl\u00f6ckner A, Pinto N, Lee Y, Catanzaro B, Ivanov P, Fasih A (2012) PyCUDA and PyOpenCL: ascripting-based approach to GPU run-time code generation. Parallel Comput 38(3):157\u2013174. https:\/\/doi.org\/10.1016\/j.parco.2011.09.001","journal-title":"Parallel Comput"},{"key":"4932_CR11","unstructured":"Bailey DH, Barszcz E, Barton JT, Browning DS, Carter RL, Fatoohi RA, Frederickson PO, Lasinski TA, Simon HD, Venkatakrishnan V, Weeratunga SK (1994) The NAS Parallel Benchmarks RNR-94-007. Technical report, NASA Advanced Supercomputing Division"},{"key":"4932_CR12","doi-asserted-by":"publisher","unstructured":"Di\u00a0Domenico D, Cavalheiro GGH, Lima JVF (2022) Nas parallel benchmark kernels with python: a performance and programming effort analysis focusing on gpus. In: 2022 30th Euromicro international conference on parallel, distributed and network-based processing (PDP), pp 26\u201333. https:\/\/doi.org\/10.1109\/PDP55904.2022.00013","DOI":"10.1109\/PDP55904.2022.00013"},{"key":"4932_CR13","doi-asserted-by":"publisher","unstructured":"Araujo GAd, Griebler D, Danelutto M, Fernandes LG (2020) Efficient NAS Parallel Benchmark Kernels with CUDA. In: 2020 28th Euromicro international conference on parallel, distributed and network-based processing (PDP), pp 9\u201316. https:\/\/doi.org\/10.1109\/PDP50117.2020.00009","DOI":"10.1109\/PDP50117.2020.00009"},{"key":"4932_CR14","doi-asserted-by":"publisher","unstructured":"Araujo G, Griebler D, Rockenbach DA, Danelutto M, Fernandes LG (2021) NAS Parallel Benchmarks with CUDA and beyond. Software: Practice and Experience, 1\u201328. https:\/\/doi.org\/10.1002\/spe.3056","DOI":"10.1002\/spe.3056"},{"key":"4932_CR15","first-page":"67","volume-title":"Lang Compil Parallel Comput","author":"R Xu","year":"2015","unstructured":"Xu R, Tian X, Chandrasekaran S, Yan Y, Chapman B (2015) NAS Parallel Benchmarks for GPGPUs using a directive-based programming model. In: Brodman J, Tu P (eds) Lang Compil Parallel Comput. Springer, Cham, pp 67\u201381"},{"key":"4932_CR16","doi-asserted-by":"crossref","unstructured":"Behnel S, Bradshaw RW, Seljebotn DS (2009) Cython tutorial. In: Varoquaux G, van\u00a0der Walt S, Millman J (eds) Proceedings of the 8th python in science conference, Pasadena, CA USA pp 4\u201314","DOI":"10.25080\/MJMV8092"},{"key":"4932_CR17","unstructured":"NumPy Documentation: Version 1.21. The NumPy community (2021)"},{"key":"4932_CR18","doi-asserted-by":"publisher","first-page":"743","DOI":"10.1016\/j.future.2021.07.021","volume":"125","author":"J L\u00f6ff","year":"2021","unstructured":"L\u00f6ff J, Griebler D, Mencagli G, Araujo G, Torquati M, Danelutto M, Fernandes LG (2021) The NAS Parallel Benchmarks for evaluating C++ parallel programming frameworks on shared-memory architectures. Futur Gener Comput Syst 125:743\u2013757. https:\/\/doi.org\/10.1016\/j.future.2021.07.021","journal-title":"Futur Gener Comput Syst"},{"key":"4932_CR19","doi-asserted-by":"crossref","unstructured":"Fenton NE, Bieman J (2014) Software metrics: a rigorous and practical approach, 3rd edn. Chapman & Hall\/CRC innovations in software engineering and software development series. CRC Press, Boca Raton","DOI":"10.1201\/b17461"},{"key":"4932_CR20","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1002\/cpe.1860","volume":"24","author":"M Malik","year":"2012","unstructured":"Malik M, Li T, Sharif U, Shahid R, El-Ghazawi TA, Newby GB (2012) Productivity of GPUs under different programming paradigms. Concurr Comput Pract Exp 24:179\u2013191","journal-title":"Concurr Comput Pract Exp"},{"key":"4932_CR21","unstructured":"Christgau S, Spazier J, Schnor B, Hammitzsch M, Babeyko A, Waechter J (2014) A comparison of CUDA and OpenACC: accelerating the tsunami simulation EasyWave. In: ARCS 2014; 2014 workshop proceedings on architecture of computing systems, pp 1\u20135"},{"key":"4932_CR22","doi-asserted-by":"publisher","unstructured":"Memeti S, Li L, Pllana S, Ko\u0142odziej J, Kessler C (2017) Benchmarking OpenCL, OpenACC, OpenMP, and CUDA: programming productivity, performance, and energy consumption. In: Proceedings of the 2017 workshop on adaptive resource management and scheduling for cloud computing. ARMS-CC\u201917, pp 1\u20136. ACM, New York, NY. https:\/\/doi.org\/10.1145\/3110355.3110356","DOI":"10.1145\/3110355.3110356"},{"key":"4932_CR23","doi-asserted-by":"publisher","unstructured":"Hoshino T, Maruyama N, Matsuoka S, Takaki R (2013) CUDA vs OpenACC: Performance case studies with kernel benchmarks and a memory-bound CFD application. In: 2013 13th IEEE\/ACM international symposium on cluster, cloud, and grid computing, pp 136\u2013143 https:\/\/doi.org\/10.1109\/CCGrid.2013.12","DOI":"10.1109\/CCGrid.2013.12"},{"key":"4932_CR24","doi-asserted-by":"publisher","unstructured":"Gimenes TL, Pisani F, Borin E (2018) Evaluating the performance and cost of accelerating seismic processing with CUDA, OpenCL, OpenACC, and OpenMP. In: 2018 IEEE international parallel and distributed processing symposium (IPDPS), pp 399\u2013408. https:\/\/doi.org\/10.1109\/IPDPS.2018.00050","DOI":"10.1109\/IPDPS.2018.00050"},{"key":"4932_CR25","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1504\/IJGUC.2019.099686","volume":"10","author":"VF Lima","year":"2019","unstructured":"Lima VF, Di Domenico JD (2019) HPSM: a programming framework to exploit multi-CPU and multi-GPU systems simultaneously. Int J Grid Util Comput 10:201. https:\/\/doi.org\/10.1504\/IJGUC.2019.099686","journal-title":"Int J Grid Util Comput"},{"key":"4932_CR26","doi-asserted-by":"publisher","unstructured":"Li L, Kessler C (2017) VectorPU: a generic and efficient data-container and component model for transparent data transfer on GPU-based heterogeneous systems. In: Proceedings of the 8th and 6th workshop on parallel programming and run-time management techniques for many-core architectures and design tools and architectures for multicore embedded comp. Platforms. PARMA-DITAM \u201917. ACM, NY, NY, USA, pp 7\u201312. https:\/\/doi.org\/10.1145\/3029580.3029582","DOI":"10.1145\/3029580.3029582"},{"key":"4932_CR27","doi-asserted-by":"publisher","unstructured":"Gong C, Liu J, Qin J, Hu Q, Gong Z (2010) Efficient embarrassingly parallel on graphics processor unit. In: 2010 2nd international conference on education technology and computer, vol 4, pp 4\u20134004404. https:\/\/doi.org\/10.1109\/ICETC.2010.5529656","DOI":"10.1109\/ICETC.2010.5529656"},{"key":"4932_CR28","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/978-3-642-30961-8_12","volume-title":"OpenMP in a heterogeneous world","author":"H Jin","year":"2012","unstructured":"Jin H, Kellogg M, Mehrotra P (2012) Using compiler directives for accelerating CFD applications on GPUs. In: Chapman BM, Massaioli F, M\u00fcller MS, Rorro M (eds) OpenMP in a heterogeneous world. Springer, Berlin, pp 154\u2013168"},{"key":"4932_CR29","doi-asserted-by":"publisher","unstructured":"Seo S, Jo G, Lee J (2011) Performance characterization of the NAS parallel benchmarks in OpenCL. In: 2011 IEEE international symposium on workload characterization (IISWC), pp 137\u2013148. https:\/\/doi.org\/10.1109\/IISWC.2011.6114174","DOI":"10.1109\/IISWC.2011.6114174"},{"key":"4932_CR30","first-page":"1","volume":"6","author":"X Li","year":"2016","unstructured":"Li X, Shih P-C, Overbey J, Seals C, Lim A (2016) Comparing programmer productivity in OpenACC and CUDA: an empirical investigation. Int J Comput Sci Eng Appl 6:1\u201315","journal-title":"Int J Comput Sci Eng Appl"},{"key":"4932_CR31","unstructured":"Kuan L, Neves J, Pratas F, Tom\u00e1s P, Sousa L (2014) Accelerating phylogenetic inference on GPUs: an OpenACC and CUDA comparison. In: Rojas I, Guzman FMO (eds) International work-conference on bioinformatics and biomedical engineering, IWBBIO 2014, Granada, Spain, April 7\u20139, 2014, pp 589\u2013600"},{"issue":"4","key":"4932_CR32","doi-asserted-by":"publisher","first-page":"1653","DOI":"10.1109\/JSTARS.2016.2516503","volume":"9","author":"X Guo","year":"2016","unstructured":"Guo X, Wu J, Wu Z, Huang B (2016) Parallel computation of aerial target reflection of background infrared radiation: performance comparison of OpenMP, OpenACC, and CUDA implementations. IEEE J Sel Topics Appl Earth Observ Remote Sens 9(4):1653\u20131662. https:\/\/doi.org\/10.1109\/JSTARS.2016.2516503","journal-title":"IEEE J Sel Topics Appl Earth Observ Remote Sens"},{"key":"4932_CR33","doi-asserted-by":"publisher","first-page":"1449","DOI":"10.1007\/s11227-017-2213-5","volume":"74","author":"A Marowka","year":"2018","unstructured":"Marowka A (2018) Python accelerators for high-performance computing. J Supercomput 74:1449\u20131460","journal-title":"J Supercomput"},{"key":"4932_CR34","doi-asserted-by":"publisher","unstructured":"Dogaru R, Dogaru I (2015) A low cost high performance computing platform for cellular nonlinear networks using Python for CUDA. In: 2015 20th international conference on control systems and computer science, pp 593\u2013598 https:\/\/doi.org\/10.1109\/CSCS.2015.36","DOI":"10.1109\/CSCS.2015.36"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-022-04932-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-022-04932-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-022-04932-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,11]],"date-time":"2024-10-11T00:54:00Z","timestamp":1728608040000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-022-04932-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,27]]},"references-count":34,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2023,5]]}},"alternative-id":["4932"],"URL":"https:\/\/doi.org\/10.1007\/s11227-022-04932-3","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"type":"print","value":"0920-8542"},{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2022,12,27]]},"assertion":[{"value":"6 November 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 December 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interests"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}]}}