{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T03:37:03Z","timestamp":1772509023177,"version":"3.50.1"},"reference-count":43,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,2,23]],"date-time":"2026-02-23T00:00:00Z","timestamp":1771804800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001659","name":"Deutsche Forschungsgemeinschaft","doi-asserted-by":"publisher","award":["458578717"],"award-info":[{"award-number":["458578717"]}],"id":[{"id":"10.13039\/501100001659","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002301","name":"Eesti Teadusagentuur","doi-asserted-by":"publisher","award":["PUT PRG1467"],"award-info":[{"award-number":["PUT PRG1467"]}],"id":[{"id":"10.13039\/501100002301","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002347","name":"Bundesministerium f\u00fcr Forschung, Technologie und Raumfahrt","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002347","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Journal of Systems Architecture"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.sysarc.2026.103736","type":"journal-article","created":{"date-parts":[[2026,2,23]],"date-time":"2026-02-23T16:21:54Z","timestamp":1771863714000},"page":"103736","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Integrating an open-source soft-GPU overlay with RISC-V control and high-bandwidth memory"],"prefix":"10.1016","volume":"175","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0891-235X","authenticated-orcid":false,"given":"Hector Gerardo","family":"Mu\u00f1oz Hernandez","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5405-992X","authenticated-orcid":false,"given":"Mahdi","family":"Taheri","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1810-3545","authenticated-orcid":false,"given":"Muhammad","family":"Ali","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6694-2821","authenticated-orcid":false,"given":"Keyvan","family":"Shahin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0000-7316-8566","authenticated-orcid":false,"given":"Alireza","family":"Syavashi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2571-8441","authenticated-orcid":false,"given":"Diana","family":"G\u00f6hringer","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9687-6247","authenticated-orcid":false,"given":"Marc","family":"Reichenbach","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8975-0171","authenticated-orcid":false,"given":"Christian","family":"Herglotz","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3785-7959","authenticated-orcid":false,"given":"Michael","family":"H\u00fcbner","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"12","key":"10.1016\/j.sysarc.2026.103736_b1","first-page":"44","article-title":"Towards the use of artificial intelligence on the edge in space systems: Challenges and opportunities","volume":"35","author":"Furano","year":"2020","journal-title":"IEEE AESS"},{"key":"10.1016\/j.sysarc.2026.103736_b2","series-title":"ARC","first-page":"275","article-title":"Accelerating convolutional neural networks in FPGA-based SoCs using a soft-core GPU","author":"Munoz Hernandez","year":"2021"},{"key":"10.1016\/j.sysarc.2026.103736_b3","series-title":"ACM\/SIGDA","first-page":"165","article-title":"A statically and dynamically scalable soft GPGPU","author":"Langhammer","year":"2024"},{"key":"10.1016\/j.sysarc.2026.103736_b4","series-title":"FPL","first-page":"140","article-title":"DO-GPU: Domain optimizable soft GPUs","author":"Ma","year":"2021"},{"key":"10.1016\/j.sysarc.2026.103736_b5","series-title":"2026 Design, Automation and Test in Europe Conference","article-title":"HAWX: A hardware-aware framework for fast and scalable approximation of DNNs","author":"Nazari","year":"2026"},{"key":"10.1016\/j.sysarc.2026.103736_b6","series-title":"2024 25th International Symposium on Quality Electronic Design","first-page":"1","article-title":"Exploration of activation fault reliability in quantized systolic array-based dnn accelerators","author":"Taheri","year":"2024"},{"key":"10.1016\/j.sysarc.2026.103736_b7","series-title":"2023 24th International Symposium on Quality Electronic Design","first-page":"1","article-title":"Deepaxe: A framework for exploration of approximation and reliability trade-offs in dnn accelerators","author":"Taheri","year":"2023"},{"issue":"1","key":"10.1016\/j.sysarc.2026.103736_b8","doi-asserted-by":"crossref","first-page":"66","DOI":"10.1109\/TDMR.2024.3523386","article-title":"Adam: Adaptive approximate multiplier for fault tolerance in dnn accelerators","volume":"25","author":"Taheri","year":"2024","journal-title":"IEEE Trans. Device Mater. Reliab."},{"key":"10.1016\/j.sysarc.2026.103736_b9","series-title":"Microprocessor, The Insider\u2019s Guide to Microprocessor Hardware","article-title":"Microblaze V7 gets an MMU","author":"Halfhill","year":"2007"},{"key":"10.1016\/j.sysarc.2026.103736_b10","series-title":"Volume I: Unprivileged ISA RISC-V","author":"Waterman","year":"2019"},{"key":"10.1016\/j.sysarc.2026.103736_b11","unstructured":"K. Asanovi\u0107, R. Avizienis, J. Bachrach, S. Beamer, D. Biancolin, C. Celio, H. Cook, D. Dabbelt, J. Hauser, A. Izraelevitz, S. Karandikar, B. Keller, D. Kim, J. Koenig, Y. Lee, E. Love, M. Maas, A. Magyar, H. Mao, M. Moreto, A. Ou, D.A. Patterson, B. Richards, C. Schmidt, S. Twigg, H. Vo, A. Waterman, The Rocket Chip Generator, Technical Report UCB\/EECS-2016-17, 2016."},{"issue":"11","key":"10.1016\/j.sysarc.2026.103736_b12","doi-asserted-by":"crossref","first-page":"2629","DOI":"10.1109\/TVLSI.2019.2926114","article-title":"The cost of application-class processing: Energy and performance analysis of a linux-ready 1.7-GHz 64-bit RISC-V Core in 22-nm FDSOI technology","volume":"27","author":"Zaruba","year":"2019","journal-title":"IEEE Trans. VLSI Syst."},{"issue":"10","key":"10.1016\/j.sysarc.2026.103736_b13","doi-asserted-by":"crossref","first-page":"2700","DOI":"10.1109\/TVLSI.2017.2654506","article-title":"Near-threshold RISC-V core with DSP extensions for scalable IoT endpoint devices","volume":"25","author":"Gautschi","year":"2017","journal-title":"IEEE Trans. VLSI Syst."},{"key":"10.1016\/j.sysarc.2026.103736_b14","series-title":"2024 IEEE 35th International Conference on Application-Specific Systems, Architectures and Processors","first-page":"77","article-title":"A FPGA-HBM-based hardware streaming accelerator for GNN sampling","author":"Gui","year":"2024"},{"key":"10.1016\/j.sysarc.2026.103736_b15","series-title":"2023 IEEE 31st Annual International Symposium on Field-Programmable Custom Computing Machines","first-page":"222","article-title":"Compiler-assisted kernel selection for FPGA-based near-memory computing platforms","author":"Iskandar","year":"2023"},{"key":"10.1016\/j.sysarc.2026.103736_b16","series-title":"Enhancing Single-Instruction Multiple-Threads FPGA-Based Processors with High-Bandwidth-Memory","author":"Hernandez","year":"2025"},{"key":"10.1016\/j.sysarc.2026.103736_b17","series-title":"ACM\/SIGDA","first-page":"254","article-title":"FGPU: An SIMT-architecture for FPGAs","author":"Al Kadi","year":"2016"},{"key":"10.1016\/j.sysarc.2026.103736_b18","series-title":"Architecture of Computing Systems","first-page":"94","article-title":"Towards complete open-source environments: fpga-based gpu overlay controlled by RISC-V","author":"Mu\u00f1oz-Hernandez","year":"2026"},{"key":"10.1016\/j.sysarc.2026.103736_b19","series-title":"FOSS flows for FPGA","author":"CHIPS Alliance","year":"2023"},{"key":"10.1016\/j.sysarc.2026.103736_b20","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1016\/j.jpdc.2021.10.007","article-title":"In-depth FPGA accelerator performance evaluation with single node benchmarks from the HPC challenge benchmark suite for Intel and Xilinx FPGAs using OpenCL","volume":"160","author":"Meyer","year":"2022","journal-title":"J. Parallel Distrib. Comput."},{"key":"10.1016\/j.sysarc.2026.103736_b21","series-title":"IPDPSW","first-page":"634","article-title":"Comparative analysis of executing GPU applications on FPGA: HLS vs. Soft GPU approaches","author":"Ahn","year":"2024"},{"key":"10.1016\/j.sysarc.2026.103736_b22","series-title":"FPT","first-page":"57","article-title":"Guppy: A GPU-like soft-core processor","author":"Al-Dujaili","year":"2012"},{"issue":"2","key":"10.1016\/j.sysarc.2026.103736_b23","article-title":"Enabling GPGPU low-level hardware explorations with MIAOW: An open-source RTL implementation of a GPGPU","volume":"12","author":"Balasubramanian","year":"2015","journal-title":"ACM Trans."},{"key":"10.1016\/j.sysarc.2026.103736_b24","series-title":"MICRO","first-page":"165","article-title":"SCRATCH: An end-to-end application-aware so-GPGPU architecture and trimming tool","author":"Duarte","year":"2017"},{"key":"10.1016\/j.sysarc.2026.103736_b25","series-title":"FPT","article-title":"FlexGrip: A soft GPGPU for FPGAs","author":"Andryc","year":"2013"},{"key":"10.1016\/j.sysarc.2026.103736_b26","series-title":"LATS","first-page":"93","article-title":"Evaluating the effects of single event upsets in soft-core GPGPUs","author":"Nedel","year":"2016"},{"key":"10.1016\/j.sysarc.2026.103736_b27","series-title":"FPL","first-page":"277","article-title":"eGPU: A 750 MHz class soft GPGPU for FPGA","author":"Langhammer","year":"2023"},{"key":"10.1016\/j.sysarc.2026.103736_b28","series-title":"SBCCI","first-page":"1","article-title":"Edge GPU based on an FPGA overlay architecture using PYNQ","author":"Hernandez","year":"2022"},{"key":"10.1016\/j.sysarc.2026.103736_b29","series-title":"FPL","first-page":"326","article-title":"Specializing FGPU for persistent deep learning","author":"Ma","year":"2019"},{"key":"10.1016\/j.sysarc.2026.103736_b30","series-title":"PRIME","first-page":"181","article-title":"Exploiting FPGA dynamic partial reconfiguration for a soft GPU-based system-on-chip","author":"Monopoli","year":"2023"},{"key":"10.1016\/j.sysarc.2026.103736_b31","series-title":"EDHPC","first-page":"1","article-title":"Exploring key aspects of soft GPGPU computing for on-board acceleration of artificial intelligence algorithms in space applications","author":"Monopoli","year":"2023"},{"key":"10.1016\/j.sysarc.2026.103736_b32","doi-asserted-by":"crossref","DOI":"10.1016\/j.microrel.2021.114348","article-title":"Evaluating softcore GPU in SRAM-based FPGA under radiation-induced effects","author":"Braga","year":"2021","journal-title":"Microelectron. Reliab."},{"key":"10.1016\/j.sysarc.2026.103736_b33","series-title":"ICECS","first-page":"1","article-title":"Investigating floating-point implementations in a softcore GPU under radiation-induced faults","author":"Goncalves","year":"2020"},{"key":"10.1016\/j.sysarc.2026.103736_b34","series-title":"FPL","article-title":"AITIA: Embedded AI techniques for industrial applications","author":"Brandalero","year":"2021"},{"key":"10.1016\/j.sysarc.2026.103736_b35","series-title":"PRIME","first-page":"177","article-title":"Enhanced soft GPU architecture for FPGAs","author":"Todaro","year":"2023"},{"key":"10.1016\/j.sysarc.2026.103736_b36","series-title":"AMD\/Xilinx Vivado Block Designs for FPGA RISC-V SoC Running Debian Linux distro","author":"Tarassov","year":"2024"},{"key":"10.1016\/j.sysarc.2026.103736_b37","series-title":"FPT","first-page":"28","article-title":"Integer computations with soft GPGPU on FPGAs","author":"Al Kadi","year":"2016"},{"key":"10.1016\/j.sysarc.2026.103736_b38","series-title":"Alveo U55c data sheet (DS978)","author":"AMD\/Xilinx","year":"2023"},{"key":"10.1016\/j.sysarc.2026.103736_b39","series-title":"Alveo U200 and U250 data sheet (DS962)","author":"AMD\/Xilinx","year":"2023"},{"issue":"2","key":"10.1016\/j.sysarc.2026.103736_b40","article-title":"Implementation floyd-warhsall algorithm for the shortest path of garage","volume":"3","author":"Triana","year":"2018","journal-title":"Int. J. Innov. Sci. Res. Technol."},{"key":"10.1016\/j.sysarc.2026.103736_b41","series-title":"Cortex - A9 technical reference manual","author":"ARM","year":"2008"},{"key":"10.1016\/j.sysarc.2026.103736_b42","series-title":"ZC706 evaluation board for the zynq-700 XC7z046 SoC user guide (UG954)","author":"AMD\/Xilinx","year":"2019"},{"key":"10.1016\/j.sysarc.2026.103736_b43","series-title":"Embedded Computer Systems: Architectures, Modeling, and Simulation","first-page":"72","article-title":"A novel system simulation framework for HBM2 FPGA platforms","author":"Mu\u00f1oz Hernandez","year":"2025"}],"container-title":["Journal of Systems Architecture"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1383762126000548?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1383762126000548?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T02:11:37Z","timestamp":1772503897000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1383762126000548"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":43,"alternative-id":["S1383762126000548"],"URL":"https:\/\/doi.org\/10.1016\/j.sysarc.2026.103736","relation":{},"ISSN":["1383-7621"],"issn-type":[{"value":"1383-7621","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Integrating an open-source soft-GPU overlay with RISC-V control and high-bandwidth memory","name":"articletitle","label":"Article Title"},{"value":"Journal of Systems Architecture","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.sysarc.2026.103736","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Authors. Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"103736"}}