{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T22:30:50Z","timestamp":1766269850449,"version":"build-2065373602"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2019,4,8]],"date-time":"2019-04-08T00:00:00Z","timestamp":1554681600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","award":["779877"],"award-info":[{"award-number":["779877"]}],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010198","name":"Ministerio de Econom\u00eda, Industria y Competitividad, Gobierno de Espa\u00f1a","doi-asserted-by":"publisher","award":["FJCI-2015-24753"],"award-info":[{"award-number":["FJCI-2015-24753"]}],"id":[{"id":"10.13039\/501100010198","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","award":["671697"],"award-info":[{"award-number":["671697"]}],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010198","name":"Ministerio de Econom\u00eda, Industria y Competitividad, Gobierno de Espa\u00f1a","doi-asserted-by":"publisher","award":["RYC-2016-21104"],"award-info":[{"award-number":["RYC-2016-21104"]}],"id":[{"id":"10.13039\/501100010198","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006531","name":"Departament d\u2019Universitats, Recerca i Societat de la Informaci\u00f3","doi-asserted-by":"publisher","award":["2013 BP B 00243"],"award-info":[{"award-number":["2013 BP B 00243"]}],"id":[{"id":"10.13039\/501100006531","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2020,3]]},"DOI":"10.1007\/s11227-019-02842-5","type":"journal-article","created":{"date-parts":[[2019,4,8]],"date-time":"2019-04-08T09:03:04Z","timestamp":1554714184000},"page":"2039-2062","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["Using Arm\u2019s scalable vector extension on stencil codes"],"prefix":"10.1007","volume":"76","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2869-668X","authenticated-orcid":false,"given":"Adri\u00e0","family":"Armejach","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Helena","family":"Caminal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Juan M.","family":"Cebrian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rub\u00e9n","family":"Langarita","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rekai","family":"Gonz\u00e1lez-Alberquilla","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chris","family":"Adeniyi-Jones","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mateo","family":"Valero","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marc","family":"Casas","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Miquel","family":"Moret\u00f3","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,4,8]]},"reference":[{"issue":"2","key":"2842_CR1","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1109\/MM.2017.35","volume":"37","author":"N Stephens","year":"2017","unstructured":"Stephens N, Biles S, Boettcher M, Eapen J, Eyole M, Gabrielli G, Horsnell M, Magklis G, Martinez A, Pr\u00e9millieu N, Reid A, Rico A, Walker P (2017) The ARM scalable vector extension. IEEE Micro 37(2):26\u201339. \nhttps:\/\/doi.org\/10.1109\/MM.2017.35\n\n [Online]","journal-title":"IEEE Micro"},{"key":"2842_CR2","unstructured":"Yoshida T (2016) Introduction of Fujitsu\u2019s HPC processor for the post-K computer. In: Hot Chips 28 Symposium (HCS), Ser. Hot Chips\u201916. IEEE"},{"key":"2842_CR3","doi-asserted-by":"publisher","unstructured":"Datta K, Murphy M, Volkov V, Williams S, Carter J, Oliker L, Patterson DA, Shalf J, Yelick KA (2008) Stencil computation optimization and auto-tuning on state-of-the-art multicore architectures. In: Proceedings of the ACM\/IEEE Conference on High Performance Computing, SC 2008, November 15\u201321, 2008, Austin, Texas, USA, p. 4. \nhttps:\/\/doi.org\/10.1145\/1413370.1413375","DOI":"10.1145\/1413370.1413375"},{"key":"2842_CR4","doi-asserted-by":"publisher","unstructured":"Yount C, Tobin J, Breuer A, Duran A (2016) YASK\u2013yet another stencil kernel: A framework for HPC stencil code-generation and tuning. In: Sixth International Workshop on Domain-Specific Languages and High-Level Frameworks for High Performance Computing, WOLFHPC@SC 2016, Salt Lake, UT, USA, November 14, 2016, pp 30\u201339. \nhttps:\/\/doi.org\/10.1109\/WOLFHPC.2016.08","DOI":"10.1109\/WOLFHPC.2016.08"},{"key":"2842_CR5","doi-asserted-by":"publisher","unstructured":"Frigo M, Strumpen V (2005) Cache oblivious stencil computations. In: Proceedings of the 19th Annual International Conference on Supercomputing, ICS 2005, Cambridge, MA, USA, June 20\u201322, pp 361\u2013366. \nhttps:\/\/doi.org\/10.1145\/1088149.1088197","DOI":"10.1145\/1088149.1088197"},{"key":"2842_CR6","volume-title":"Multigrid","author":"U Trottenberg","year":"2000","unstructured":"Trottenberg U, Oosterlee CW, Schuller A (2000) Multigrid. Academic Press, Cambridge"},{"issue":"1","key":"2842_CR7","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1134\/S0361768815010077","volume":"41","author":"VT Zhukov","year":"2015","unstructured":"Zhukov VT, Krasnov MM, Novikova ND, Feodoritova OB (2015) Multigrid effectiveness on modern computing architectures. Program Comput Softw 41(1):14\u201322. \nhttps:\/\/doi.org\/10.1134\/S0361768815010077","journal-title":"Program Comput Softw"},{"issue":"20","key":"2842_CR8","doi-asserted-by":"publisher","first-page":"7692","DOI":"10.1016\/j.jcp.2010.06.024","volume":"229","author":"D Komatitsch","year":"2010","unstructured":"Komatitsch D, Erlebacher G, G\u00f6ddeke D, Mich\u00e9a D (2010) High-order finite-element seismic wave propagation modeling with MPI on a large GPU cluster. J Comput Phys 229(20):7692\u20137714. \nhttps:\/\/doi.org\/10.1016\/j.jcp.2010.06.024","journal-title":"J Comput Phys"},{"issue":"2","key":"2842_CR9","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1016\/j.pnucene.2010.09.011","volume":"53","author":"A Heimlich","year":"2011","unstructured":"Heimlich A, Mol A, Pereira C (2011) Gpu-based monte carlo simulation in neutron transport and finite differences heat equation evaluation. Prog Nucl Energy 53(2):229\u2013239","journal-title":"Prog Nucl Energy"},{"issue":"1","key":"2842_CR10","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1016\/j.chemolab.2011.03.009","volume":"108","author":"F Moln\u00e1r","year":"2011","unstructured":"Moln\u00e1r F, Izs\u00e1k F, M\u00e9sz\u00e1ros R, Lagzi I (2011) Simulation of reaction\u2013diffusion processes in three dimensions using cuda. Chemom Intell Lab Syst 108(1):76\u201385","journal-title":"Chemom Intell Lab Syst"},{"key":"2842_CR11","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/1763.001.0001","volume-title":"Cellular automata machines-a new environment for modeling","author":"T Toffoli","year":"1987","unstructured":"Toffoli T, Margolus N (1987) Cellular automata machines-a new environment for modeling. MIT Press, Cambridge"},{"key":"2842_CR12","doi-asserted-by":"publisher","unstructured":"Espasa R, Valero M, Smith J.E (1998) Vector architectures: past, present and future. In: Proceedings of the 12th International Conference on Supercomputing, ICS 1998, Melbourne, Australia, July 13\u201317, pp 425\u2013432. \nhttps:\/\/doi.org\/10.1145\/277830.277935","DOI":"10.1145\/277830.277935"},{"key":"2842_CR13","unstructured":"Intel Architecture instruction set extensions programming reference. Intel Corporation (2016). \nhttps:\/\/software.intel.com\/sites\/default\/files\/managed\/c5\/15\/architecture-instruction-set-extensions-programming-reference.pdf"},{"key":"2842_CR14","unstructured":"Fuller S (1998) Motorola\u2019s altivec\u2122 technology. Motorola Inc., Tech. Rep. \nhttp:\/\/www.nxp.com\/assets\/documents\/data\/en\/fact-sheets\/ALTIVECWP.pdf"},{"key":"2842_CR15","unstructured":"Lee Y, Schmidt C, Ou A, Waterman A, Asanovic\u0300 K (2015) The hwacha vector-fetch architecture manual. In: Electrical Engineering and Computer Sciences, University of California at Berkeley, Tech. Rep., \nhttps:\/\/www2.eecs.berkeley.edu\/Pubs\/TechRpts\/2015\/EECS-2015-262.pdf"},{"key":"2842_CR16","doi-asserted-by":"crossref","unstructured":"Waterman A, Lee Y, Patterson D, Asanovic\u0300 K (2014) The risc-v instruction set manual, volume i: user-level ISA, version 2.0. Electrical Engineering and Computer Sciences, University of California at Berkeley, Tech. Rep. \nhttps:\/\/www2.eecs.berkeley.edu\/Pubs\/TechRpts\/2014\/EECS-2014-54.pdf","DOI":"10.21236\/ADA605735"},{"key":"2842_CR17","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1145\/359327.359336","volume":"21","author":"RM Russell","year":"1978","unstructured":"Russell RM (1978) The cray-1 computer system. Commun ACM 21:63\u201372","journal-title":"Commun ACM"},{"key":"2842_CR18","first-page":"377","volume-title":"High performance parallelism pearls, multicore and many-core programming approaches","author":"J Reinders","year":"2014","unstructured":"Reinders J, Jeffers J (2014) High performance parallelism pearls, multicore and many-core programming approaches. Morgan Kaufmann, Bulington, pp 377\u2013396"},{"key":"2842_CR19","first-page":"75","volume":"2014","author":"S Kronawitter","year":"2014","unstructured":"Kronawitter S, Lengauer C (2014) Optimization of two jacobi smoother kernels by domain-specific program transformation. HiStencils 2014:75\u201380","journal-title":"HiStencils"},{"key":"2842_CR20","doi-asserted-by":"crossref","unstructured":"Christen M, Schenk O, Burkhart H (May 2011) PATUS: a code generation and autotuning framework for parallel iterative stencil computations on modern microarchitectures. In: 25th IEEE International Symposium on Parallel and Distributed Processing, IPDPS 2011\u2013Conference Proceedings, pp 676\u2013687","DOI":"10.1109\/IPDPS.2011.70"},{"key":"2842_CR21","first-page":"51","volume":"14","author":"L Szustak","year":"2014","unstructured":"Szustak L, Rojek K, Wyrzykowski R, Gepner P (2014) Toward efficient distribution of MPDATA stencil computation on Intel MIC architecture. Proce. HiStencils 14:51\u201356","journal-title":"Proce. HiStencils"},{"key":"2842_CR22","doi-asserted-by":"publisher","unstructured":"Kamil S, Chan CP, Oliker L, Shalf J, Williams S (2010) An auto-tuning framework for parallel multicore stencil computations. In: 24th IEEE International Symposium on Parallel and Distributed Processing, IPDPS 2010, Atlanta, Georgia, USA, 19\u201323 April 2010\u2013Conference Proceedings, pp 1\u201312. \nhttps:\/\/doi.org\/10.1109\/IPDPS.2010.5470421","DOI":"10.1109\/IPDPS.2010.5470421"},{"key":"2842_CR23","doi-asserted-by":"publisher","unstructured":"Kamil S, Husbands P, Oliker L, Shalf J, Yelick K.A (2005) Impact of modern memory subsystems on cache optimizations for stencil computations. In: Proceedings of the 2005 Workshop on Memory System Performance, Chicago, Illinois, USA, June 12, 2005, pp 36\u201343. \nhttps:\/\/doi.org\/10.1145\/1111583.1111589","DOI":"10.1145\/1111583.1111589"},{"key":"2842_CR24","doi-asserted-by":"publisher","unstructured":"Kamil S, Datta K, Williams S, Oliker L, Shalf J, Yelick KA (2006) Implicit and explicit optimizations for stencil computations. In: Proceedings of the 2006 Workshop on Memory System Performance and Correctness, San Jose, CA, USA, October 11, 2006, pp 51\u201360. \nhttps:\/\/doi.org\/10.1145\/1178597.1178605","DOI":"10.1145\/1178597.1178605"},{"key":"2842_CR25","doi-asserted-by":"publisher","unstructured":"Tang Y, Chowdhury RA, Kuszmaul BC, Luk C, Leiserson CE (2011) The pochoir stencil compiler. In: SPAA 2011: Proceedings of the 23rd Annual ACM Symposium on Parallelism in Algorithms and Architectures, San Jose, CA, USA, June 4\u20136, 2011 (Co-located with FCRC 2011), pp 117\u2013128. \nhttps:\/\/doi.org\/10.1145\/1989493.1989508","DOI":"10.1145\/1989493.1989508"},{"key":"2842_CR26","doi-asserted-by":"publisher","unstructured":"Dursun H, Nomura K, Peng L, Seymour R, Wang W, Kalia RK, Nakano A, Vashishta P (2009) A multilevel parallelization framework for high-order stencil computations. In: Euro-Par 2009 Parallel Processing, 15th International Euro-Par Conference, Delft, The Netherlands, August 25\u201328, 2009. Proceedings, pp 642\u2013653. \nhttps:\/\/doi.org\/10.1007\/978-3-642-03869-3_61","DOI":"10.1007\/978-3-642-03869-3_61"},{"key":"2842_CR27","doi-asserted-by":"publisher","unstructured":"Peng L, Seymour R, Nomura K, Kalia RK, Nakano A, Vashishta P, Loddoch A, Netzband M, Volz W.R, Wong CC (2009) High-order stencil computations on multicore clusters. In: 23rd IEEE International Symposium on Parallel and Distributed Processing, IPDPS 2009, Rome, Italy, May 23\u201329, 2009, pp. 1\u201311. \nhttps:\/\/doi.org\/10.1109\/IPDPS.2009.5161011","DOI":"10.1109\/IPDPS.2009.5161011"},{"key":"2842_CR28","doi-asserted-by":"publisher","unstructured":"Maruyama N, Nomura T, Sato K, Matsuoka S (2011) Physis: an implicitly parallel programming model for stencil computations on large-scale gpu-accelerated supercomputers. In: Conference on High Performance Computing Networking, Storage and Analysis, SC 2011, Seattle, WA, USA, November 12\u201318, 2011, pp 11:1\u201311:12. \nhttps:\/\/doi.org\/10.1145\/2063384.2063398","DOI":"10.1145\/2063384.2063398"},{"key":"2842_CR29","doi-asserted-by":"crossref","unstructured":"Yount C (2015) Vector folding: improving stencil performance via multi-dimensional simd-vector representation. In: IEEE 17th International Conference on High Performance Computing and Communications, pp 865\u2013870","DOI":"10.1109\/HPCC-CSS-ICESS.2015.27"},{"key":"2842_CR30","volume-title":"Improving performance via mini-applications","author":"MA Heroux","year":"2009","unstructured":"Heroux MA, Doerfler DW, Crozier PS, Willenbring JM, Edwards HC, Williams A, Rajan M, Keiter ER, Thornquist HK, Numrich RW (2009) Improving performance via mini-applications. Sandia National Laboratories, Albuquerque"},{"key":"2842_CR31","unstructured":"The mantevo suite release version 3.0. \nhttps:\/\/mantevo.org\/download\/"},{"issue":"2","key":"2842_CR32","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2024716.2024718","volume":"39","author":"N Binkert","year":"2011","unstructured":"Binkert N, Beckmann B, Black G, Reinhardt SK, Saidi A, Basu A, Hestness J, Hower DR, Krishna T, Sardashti S, Sen R, Sewell K, Shoaib M, Vaish N, Hill MD, Wood DA (2011) The gem5 simulator. SIGARCH Comput Archit News 39(2):1\u20137. \nhttps:\/\/doi.org\/10.1145\/2024716.2024718","journal-title":"SIGARCH Comput Archit News"},{"key":"2842_CR33","unstructured":"ARM Cortex-A72 MPCore Processor Technical Reference Manual. \nhttps:\/\/static.docs.arm.com\/100095\/0003\/cortex_a72_mpcore_trm_100095_0003_05_en.pdf?_ga=2.187644577.805846766.1551351186-1814310934.1538732624"},{"key":"2842_CR34","unstructured":"ARM Cortex-A53 MPCore Processor Technical Reference Manual. \nhttp:\/\/infocenter.arm.com\/help\/topic\/com.arm.doc.ddi0500d\/DDI0500D_cortex_a53_r0p2_trm.pdf"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-019-02842-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-019-02842-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-019-02842-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,6]],"date-time":"2020-04-06T23:21:58Z","timestamp":1586215318000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-019-02842-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,4,8]]},"references-count":34,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2020,3]]}},"alternative-id":["2842"],"URL":"https:\/\/doi.org\/10.1007\/s11227-019-02842-5","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"type":"print","value":"0920-8542"},{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2019,4,8]]},"assertion":[{"value":"8 April 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}