{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T02:03:34Z","timestamp":1780020214552,"version":"3.53.1"},"reference-count":46,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T00:00:00Z","timestamp":1774224000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006132","name":"Office of Science","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006132","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006192","name":"Advanced Scientific Computing Research","doi-asserted-by":"publisher","award":["FWP ERKJ452"],"award-info":[{"award-number":["FWP ERKJ452"]}],"id":[{"id":"10.13039\/100006192","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006192","name":"Advanced Scientific Computing Research","doi-asserted-by":"publisher","award":["DE-SC0025645"],"award-info":[{"award-number":["DE-SC0025645"]}],"id":[{"id":"10.13039\/100006192","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006192","name":"Advanced Scientific Computing Research","doi-asserted-by":"publisher","award":["DE-FOA-0003264"],"award-info":[{"award-number":["DE-FOA-0003264"]}],"id":[{"id":"10.13039\/100006192","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Future Generation Computer Systems"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.future.2026.108479","type":"journal-article","created":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T00:48:42Z","timestamp":1773967722000},"page":"108479","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"C","title":["Accl++ : A high-productivity programming language for performance and code portability on heterogeneous systems"],"prefix":"10.1016","volume":"182","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3780-1106","authenticated-orcid":false,"given":"Marc","family":"Gonzalez","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7531-6374","authenticated-orcid":false,"given":"Pedro","family":"Valero","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3419-4037","authenticated-orcid":false,"given":"M.A.H.","family":"Monil","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8872-4932","authenticated-orcid":false,"given":"Seyong","family":"Lee","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5426-1415","authenticated-orcid":false,"given":"Beau","family":"Johnston","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5448-4667","authenticated-orcid":false,"given":"Aaron","family":"Young","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8259-8891","authenticated-orcid":false,"given":"Narasinga Rao","family":"Miniskar","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6647-2690","authenticated-orcid":false,"given":"Keita","family":"Teranishi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2449-6720","authenticated-orcid":false,"given":"Jeff","family":"Vetter","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.future.2026.108479_bib0001","unstructured":"HIPIFY documentation, 2024, [Online; accessed 12-Nov-2024], https:\/\/rocm.docs.amd.com\/projects\/HIPIFY\/en\/latest\/."},{"key":"10.1016\/j.future.2026.108479_bib0002","unstructured":"C++ Programming for Heterogeneous Parallel Computing, 2024, [Online; accessed 12-Nov-2024], https:\/\/www.khronos.org\/api\/index_2017\/sycl."},{"issue":"4","key":"10.1016\/j.future.2026.108479_bib0003","doi-asserted-by":"crossref","first-page":"805","DOI":"10.1109\/TPDS.2021.3097283","article-title":"Kokkos 3: programming model extensions for the exascale era","volume":"33","author":"Trott","year":"2022","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"10.1016\/j.future.2026.108479_bib0004","series-title":"Proceedings of the 21st International Workshop on Languages and Compilers for Parallel Computing","first-page":"16","article-title":"MCUDA: an effective implementation of CUDA kernels for multi-core CPUs","author":"Stratton","year":"2008"},{"key":"10.1016\/j.future.2026.108479_bib0005","unstructured":"C. Lattner, J.A. Pienaar, M. Amini, U. Bondhugula, R. Riddle, A. Cohen, T. Shpeisman, A. Davis, N. Vasilache, O. Zinenko, MLIR: a compiler infrastructure for the end of Moore\u2019s law, CoRR abs\/2002.11054(2020). arXiv: 2002.11054."},{"key":"10.1016\/j.future.2026.108479_bib0006","series-title":"2024 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)","first-page":"667","article-title":"eCC++ : a compiler construction framework for embedded domain-specific languages","author":"Tallada","year":"2024"},{"key":"10.1016\/j.future.2026.108479_bib0007","series-title":"2021 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO)","first-page":"39","article-title":"BuildIt: a type-based multi-stage programming framework for code generation in c++","author":"Brahmakshatriya","year":"2021"},{"key":"10.1016\/j.future.2026.108479_bib0008","series-title":"Proceedings of the SC \u201923 Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis","first-page":"1697","article-title":"Moment representation of regularized lattice boltzmann methods on NVIDIA and AMD GPUs","author":"Valero-Lara","year":"2023"},{"key":"10.1016\/j.future.2026.108479_bib0009","unstructured":"Cuda sample blackscholes, 2024a, [Online; accessed 03-Jul-2025], https:\/\/github.com\/NVIDIA\/cuda-samples\/tree\/master\/Samples\/5_Domain_Specific\/BlackScholes."},{"key":"10.1016\/j.future.2026.108479_bib0010","unstructured":"Cuda sample blackscholes, 2024b, [Online; accessed 03-Jul-2025], https:\/\/developer.nvidia.com\/gpugems\/gpugems2\/part-vi-simulation-and-numerical-algorithms\/chapter-45-options-pricing-gpu."},{"key":"10.1016\/j.future.2026.108479_bib0011","unstructured":"Bristol university docking engine application, 2024, [Online; accessed 12-Nov-2024], https:\/\/github.com\/UoB-HPC\/miniBUDE."},{"key":"10.1016\/j.future.2026.108479_bib0012","series-title":"International Conference on High Performance Computing","first-page":"332","article-title":"A performance analysis of modern parallel programming models using a compute-bound application","author":"Poenaru","year":"2021"},{"key":"10.1016\/j.future.2026.108479_bib0013","series-title":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","article-title":"Evaluating and optimizing openCL kernels for high performance computing with FPGAs","author":"Zohouri","year":"2016"},{"key":"10.1016\/j.future.2026.108479_bib0014","doi-asserted-by":"crossref","first-page":"994","DOI":"10.1016\/j.procs.2011.04.105","article-title":"Optimization of multi-phase compressible lattice boltzmann codes on massively parallel multi-core systems","volume":"4","author":"Biferale","year":"2011","journal-title":"Procedia Comput. Sci."},{"issue":"1","key":"10.1016\/j.future.2026.108479_bib0015","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1002\/cpe.1466","article-title":"A flexible high-performance lattice Boltzmann GPU code for the simulations of fluid flows in complex geometries","volume":"22","author":"Bernaschi","year":"2010","journal-title":"Concurr. Comput. Pract. Exper."},{"key":"10.1016\/j.future.2026.108479_bib0016","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1016\/j.simpat.2012.03.004","article-title":"A lattice-Boltzmann solver for 3D fluid simulation on GPU","volume":"25","author":"Rinaldi","year":"2012","journal-title":"Simul. Modell. Pract. Theory"},{"issue":"6","key":"10.1016\/j.future.2026.108479_bib0017","doi-asserted-by":"crossref","first-page":"259","DOI":"10.1016\/j.parco.2013.04.001","article-title":"Scalable lattice Boltzmann solvers for CUDA GPU clusters","volume":"39","author":"Obrecht","year":"2013","journal-title":"Parallel Comput."},{"issue":"9","key":"10.1016\/j.future.2026.108479_bib0018","doi-asserted-by":"crossref","first-page":"2350","DOI":"10.1016\/j.cpc.2014.04.018","article-title":"Sailfish: a flexible multi-GPU implementation of the lattice Boltzmann method","volume":"185","author":"Januszewski","year":"2014","journal-title":"Comput. Phys. Commun."},{"key":"10.1016\/j.future.2026.108479_bib0019","doi-asserted-by":"crossref","unstructured":"K.O.W. Group, The C++ for openCL 1.0 and 2021 programming language documentation, 2022a, https:\/\/www.khronos.org\/opencl\/assets\/CXX_for_OpenCL.html.","DOI":"10.1145\/3529538.3529981"},{"key":"10.1016\/j.future.2026.108479_bib0020","unstructured":"K.O.W. Group, OpenCL C++ bindings, 2022b, https:\/\/github.khronos.org\/OpenCL-CLHPP\/."},{"key":"10.1016\/j.future.2026.108479_bib0021","unstructured":"O.A.R. Board, OpenMP application program interface version 6.0, 2024, https:\/\/www.openmp.org\/wp-content\/uploads\/OpenMP-API-Specification-6-0.pdf."},{"key":"10.1016\/j.future.2026.108479_bib0022","unstructured":"O.A.R. Board, OpenACC application program interface version 3.3, 2022, https:\/\/www.openacc.org\/sites\/default\/files\/inline-images\/Specification\/OpenACC-3.3-final.pdf."},{"issue":"3","key":"10.1016\/j.future.2026.108479_bib0023","doi-asserted-by":"crossref","first-page":"66","DOI":"10.1109\/MCSE.2010.69","article-title":"OpenCL: a parallel programming standard for heterogeneous computing systems","volume":"12","author":"Stone","year":"2010","journal-title":"Comput. Sci. Eng."},{"issue":"1","key":"10.1016\/j.future.2026.108479_bib0024","doi-asserted-by":"crossref","first-page":"68","DOI":"10.1145\/3200691.3178493","article-title":"HPVM: heterogeneous parallel virtual machine","volume":"53","author":"Kotsifakou","year":"2018","journal-title":"SIGPLAN Not."},{"key":"10.1016\/j.future.2026.108479_bib0025","series-title":"Proceedings of the 25th IEEE High Performance Extreme Computing Conference","first-page":"1","article-title":"IRIS: a portable runtime system exploiting multiple heterogeneous programming systems","author":"Kim","year":"2021"},{"key":"10.1016\/j.future.2026.108479_bib0026","series-title":"C++ Templates","author":"Vandevoorde","year":"2002"},{"key":"10.1016\/j.future.2026.108479_bib0027","doi-asserted-by":"crossref","unstructured":"T. Sheard, S.P. Jones, Template meta-programming for haskellin: Proceedings of the 2002 ACM SIGPLAN Workshop on Haskell, Association for Computing Machinery, Pittsburgh, Pennsylvania, (2002) 1\u20131610.1145\/581690.581691.","DOI":"10.1145\/581690.581691"},{"issue":"OOPSLA","key":"10.1016\/j.future.2026.108479_bib0028","doi-asserted-by":"crossref","DOI":"10.1145\/3133901","article-title":"The tensor algebra compiler","volume":"1","author":"Kjolstad","year":"2017","journal-title":"Proc. ACM Program. Lang."},{"key":"10.1016\/j.future.2026.108479_bib0029","series-title":"Proceedings of the 2019 IEEE\/ACM International Symposium on Code Generation and Optimization","first-page":"193","article-title":"Tiramisu: a polyhedral compiler for expressing fast and portable code","author":"Baghdadi","year":"2019"},{"issue":"6","key":"10.1016\/j.future.2026.108479_bib0030","doi-asserted-by":"crossref","first-page":"519","DOI":"10.1145\/2499370.2462176","article-title":"Halide: a language and compiler for optimizing parallelism, locality, and recomputation in image processing pipelines","volume":"48","author":"Ragan-Kelley","year":"2013","journal-title":"SIGPLAN Not."},{"key":"10.1016\/j.future.2026.108479_bib0031","series-title":"International Symposium on Code Generation and Optimization (CGO 2011)","first-page":"224","article-title":"Intel\u2019s array building blocks: a retargetable, dynamic compiler and embedded language","author":"Newburn","year":"2011"},{"key":"10.1016\/j.future.2026.108479_bib0032","series-title":"Proceedings of the 34th ACM SIGPLAN Conference on Programming Language Design and Implementation","first-page":"105","article-title":"Terra: a multi-stage language for high-performance computing","author":"DeVito","year":"2013"},{"issue":"OOPSLA","key":"10.1016\/j.future.2026.108479_bib0033","doi-asserted-by":"crossref","DOI":"10.1145\/3276491","article-title":"GraphIt: a high-performance graph DSL","volume":"2","author":"Zhang","year":"2018","journal-title":"Proc. ACM Program. Lang."},{"issue":"2","key":"10.1016\/j.future.2026.108479_bib0034","doi-asserted-by":"crossref","DOI":"10.1145\/2866569","article-title":"Simit: a language for physical simulation","volume":"35","author":"Kjolstad","year":"2016","journal-title":"ACM Trans. Graph."},{"issue":"2","key":"10.1016\/j.future.2026.108479_bib0035","doi-asserted-by":"crossref","DOI":"10.1088\/2058-9565\/ab6bf6","article-title":"XACC: a system-level software infrastructure for heterogeneous quantum\u2013classical computing","volume":"5","author":"McCaskey","year":"2020","journal-title":"Quantum Sci. Technol."},{"issue":"OOPSLA","key":"10.1016\/j.future.2026.108479_bib0036","doi-asserted-by":"crossref","DOI":"10.1145\/3276489","article-title":"AnyDSL: a partial evaluation framework for programming high-performance libraries","volume":"2","author":"Lei\u00dfa","year":"2018","journal-title":"Proc. ACM Program. Lang."},{"key":"10.1016\/j.future.2026.108479_bib0037","series-title":"SC \u201912: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis","first-page":"1","article-title":"Legion: expressing locality and independence with logical regions","author":"Bauer","year":"2012"},{"key":"10.1016\/j.future.2026.108479_bib0038","series-title":"SC \u201906: Proceedings of the 2006 ACM\/IEEE Conference on Supercomputing","first-page":"4","article-title":"Sequoia: programming the memory hierarchy","author":"Fatahalian","year":"2006"},{"key":"10.1016\/j.future.2026.108479_bib0039","series-title":"Proceedings of the 30th ACM SIGPLAN Conference on Programming Language Design and Implementation","first-page":"38","article-title":"PetaBricks: a language and compiler for algorithmic choice","author":"Ansel","year":"2009"},{"issue":"8","key":"10.1016\/j.future.2026.108479_bib0040","doi-asserted-by":"crossref","DOI":"10.1145\/3016078.2851178","article-title":"A programming system for future proofing performance critical libraries","volume":"51","author":"Chang","year":"2016","journal-title":"SIGPLAN Not."},{"issue":"OOPSLA","key":"10.1016\/j.future.2026.108479_bib0041","doi-asserted-by":"crossref","DOI":"10.1145\/3360552","article-title":"Staged abstract interpreters: fast and modular whole-program analysis via meta-programming","volume":"3","author":"Wei","year":"2019","journal-title":"Proc. ACM Program. Lang."},{"key":"10.1016\/j.future.2026.108479_bib0042","series-title":"A Gentle Introduction to Multi-stage Programming","first-page":"30","author":"Taha","year":"2004"},{"key":"10.1016\/j.future.2026.108479_bib0043","series-title":"Technical Report","article-title":"The SUIF Compiler System: A Parallelizing and Optimizing Research Compiler","author":"Wilson","year":"1994"},{"key":"10.1016\/j.future.2026.108479_bib0044","unstructured":"S.C. Group, The builder library, a tool to construct or modify suif code within the suif compiler, 2022, https:\/\/suif.stanford.edu\/suif\/suif1\/docs\/builder_toc.html."},{"key":"10.1016\/j.future.2026.108479_bib0045","series-title":"Proceedings of the Ninth International Conference on Generative Programming and Component Engineering","first-page":"127","article-title":"Lightweight modular staging: a pragmatic approach to runtime code generation and compiled DSLs","author":"Rompf","year":"2010"},{"issue":"4s","key":"10.1016\/j.future.2026.108479_bib0046","doi-asserted-by":"crossref","DOI":"10.1145\/2584665","article-title":"Delite: a compiler architecture for performance-oriented embedded domain-specific languages","volume":"13","author":"Sujeeth","year":"2014","journal-title":"ACM Trans. Embed. Comput. Syst."}],"container-title":["Future Generation Computer Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167739X26001135?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167739X26001135?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T01:38:51Z","timestamp":1780018731000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167739X26001135"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":46,"alternative-id":["S0167739X26001135"],"URL":"https:\/\/doi.org\/10.1016\/j.future.2026.108479","relation":{},"ISSN":["0167-739X"],"issn-type":[{"value":"0167-739X","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Accl++ : A high-productivity programming language for performance and code portability on heterogeneous systems","name":"articletitle","label":"Article Title"},{"value":"Future Generation Computer Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.future.2026.108479","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Authors. Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"108479"}}