{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T20:39:00Z","timestamp":1757623140914,"version":"3.44.0"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2025,8,15]],"date-time":"2025-08-15T00:00:00Z","timestamp":1755216000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,15]],"date-time":"2025-08-15T00:00:00Z","timestamp":1755216000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012659","name":"Foundation for Innovative Research Groups of the National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62272474, 61561146395","62272474, 61561146395","62272474, 61561146395","62272474, 61561146395"],"award-info":[{"award-number":["62272474, 61561146395","62272474, 61561146395","62272474, 61561146395","62272474, 61561146395"]}],"id":[{"id":"10.13039\/501100012659","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"DOI":"10.1007\/s11227-025-07734-5","type":"journal-article","created":{"date-parts":[[2025,8,15]],"date-time":"2025-08-15T07:04:42Z","timestamp":1755241482000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Automatic GPU memory access optimization for AoSoA-based application in OP2 framework"],"prefix":"10.1007","volume":"81","author":[{"given":"Tong","family":"Lei","sequence":"first","affiliation":[]},{"given":"Zongjing","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Yonggang","family":"Che","sequence":"additional","affiliation":[]},{"given":"Chuanfu","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Zhe","family":"Dai","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,8,15]]},"reference":[{"issue":"10","key":"7734_CR1","doi-asserted-by":"publisher","first-page":"1236","DOI":"10.1631\/FITEE.1800494","volume":"19","author":"X-K Liao","year":"2018","unstructured":"Liao X-K, Kai L, Yang C-Q, Li J-W, Yuan Y, Lai M-C, Huang L-B, Ping-Jing L, Fang J-B, Ren J, Shen J (2018) Moving from exascale to zettascale computing: challenges and techniques. Front Inf Technol Elect Eng 19(10):1236\u20131244","journal-title":"Front Inf Technol Elect Eng"},{"issue":"4","key":"7734_CR2","doi-asserted-by":"publisher","first-page":"382","DOI":"10.1007\/s42514-020-00039-4","volume":"2","author":"J Fang","year":"2020","unstructured":"Fang J, Huang C, Tang T, Wang Z (2020) Parallel programming models for heterogeneous many-cores: A comprehensive survey. CCF Trans High Perf Comput 2(4):382\u2013400","journal-title":"CCF Trans High Perf Comput"},{"key":"7734_CR3","doi-asserted-by":"crossref","unstructured":"Mudalige GR, Giles MB, Reguly I, Bertolli C, Kelly PHJ (2012) Op2: An active library framework for solving unstructured mesh-based applications on multi-core and many-core architectures. In: 2012 Innovative Parallel Computing (InPar), pp 1\u201312","DOI":"10.1109\/InPar.2012.6339594"},{"issue":"3","key":"7734_CR4","first-page":"330","volume":"6","author":"C Zongjing","year":"2023","unstructured":"Zongjing C, Kangjin H, Che Yonggang X, Chuanfu ZJ, Zhe D, Ming L (2023) Extending op2 framework to support portable parallel programming of complex applications. CCF Trans High Perf Computing 6(3):330\u2013342","journal-title":"CCF Trans High Perf Computing"},{"key":"7734_CR5","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2023.104831","volume":"187","author":"D Zhe","year":"2024","unstructured":"Zhe D, Liang D, YongGang C, Ming L, Jian Z, Yueqing W (2024) Evaluating performance portability of five shared-memory programming models using a high-order unstructured cfd solver. J Parall Distrib Computing 187:104831","journal-title":"J Parall Distrib Computing"},{"key":"7734_CR6","doi-asserted-by":"crossref","unstructured":"Dai Zhe, Deng Liang, Wang Yueqing, Wang Fang, Li Ming, Zhang Jian (2022) Performance optimization and analysis of the unstructured discontinuous galerkin solver on multi-core and many-core architectures. In: 2022 IEEE 24th Int Conf on High Performance Computing & Communications","DOI":"10.1109\/HPCC-DSS-SmartCity-DependSys57074.2022.00158"},{"issue":"4","key":"7734_CR7","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1177\/109434200201600403","volume":"16","author":"J Mavriplis Dimitri","year":"2002","unstructured":"Mavriplis Dimitri J (2002) Parallel performance investigations of an unstructured mesh navier-stokes solver. Int J High Perform Comput Appl 16(4):395\u2013407","journal-title":"Int J High Perform Comput Appl"},{"issue":"3","key":"7734_CR8","doi-asserted-by":"publisher","first-page":"615","DOI":"10.1002\/cpe.3022","volume":"26","author":"Z Nagy","year":"2014","unstructured":"Nagy Z, Nemes C, Hiba A, Cs\u00edk \u00c1, Kiss A, Ruszink\u00f3 M, Szolgay P (2014) Accelerating unstructured finite volume computations on field-programmable gate arrays. Concurr Computation Pract Exp 26(3):615\u2013643","journal-title":"Concurr Computation Pract Exp"},{"key":"7734_CR9","unstructured":"Balay Satish, McInnes Lois\u00a0Curfman, Zhang Hong (2018) PETSc Web page"},{"key":"7734_CR10","unstructured":"Kevin R (2003) Long, Raymond Stephen Tuminaro. Robert John Hoekstra, and Ph. An overview of Trilinos, Roscoe Ainsworth Bartlett"},{"key":"7734_CR11","doi-asserted-by":"crossref","unstructured":"DeVito Zachary, Joubert Niels, Palacios Francisco (2011) Liszt: A domain specific language for building portable mesh-based pde solvers. In: SC 2011","DOI":"10.1145\/2063384.2063396"},{"key":"7734_CR12","doi-asserted-by":"crossref","unstructured":"Bertolli Carlo, Betts Adam, Mudalige Gihan, Giles Mike, Kelly Paul (2011) Design and performance of the op2 library for unstructured mesh applications. In: Euro-Par 2011: Parallel Processing Workshops","DOI":"10.1007\/978-3-642-29737-3_22"},{"issue":"11","key":"7734_CR13","doi-asserted-by":"publisher","first-page":"1451","DOI":"10.1016\/j.jpdc.2012.07.008","volume":"73","author":"MB Giles","year":"2013","unstructured":"Giles MB, Mudalige GR, Spencer B, Bertolli C, Reguly I (2013) Designing op2 for gpu architectures. J Parallel Distrib Comput 73(11):1451\u20131460","journal-title":"J Parallel Distrib Comput"},{"key":"7734_CR14","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1016\/j.jpdc.2019.07.011","volume":"13","author":"AA Sulyok","year":"2019","unstructured":"Sulyok AA, Balogh GD, Reguly IZ, Mudalige GR (2019) Locality optimized unstructured mesh algorithms on gpus. J Parallel Distrib Comput 13:50\u201364","journal-title":"J Parallel Distrib Comput"},{"key":"7734_CR15","doi-asserted-by":"crossref","unstructured":"Reguly IZ, L\u00e1szl\u00f3 Endre, Mudalige Gihan\u00a0R, Giles Mike\u00a0B (2016) Vectorizing unstructured mesh computations for many-core architectures. Concurrency and Computation: Practice and Experience, pages 557\u2013577","DOI":"10.1002\/cpe.3621"},{"key":"7734_CR16","doi-asserted-by":"crossref","unstructured":"Mudalige GR, Reguly IZ, Giles MB (2016) Auto-vectorizing a large-scale production unstructured-mesh cfd application. In: WPMVP \u201916: Proceedings of the 3rd Workshop on Programming Models for SIMD\/Vector Processing","DOI":"10.1145\/2870650.2870651"},{"issue":"2","key":"7734_CR17","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1145\/2381056.2381072","volume":"40","author":"GR Mudalige","year":"2012","unstructured":"Mudalige GR, Giles MB, Bertolli C, Kelly PHJ (2012) Predictive modeling and analysis of op2 on distributed memory gpu clusters. ACM SIGMETRICS Perf Eval Rev 40(2):61\u201367","journal-title":"ACM SIGMETRICS Perf Eval Rev"},{"issue":"11","key":"7734_CR18","doi-asserted-by":"publisher","first-page":"669","DOI":"10.1016\/j.parco.2013.09.004","volume":"39","author":"GR Mudalige","year":"2013","unstructured":"Mudalige GR, Giles MB, Thiyagalingam J, Reguly IZ, Bertolli C, Kelly PHJ, Trefethen AE (2013) Design and initial performance of a high-level unstructured mesh framework on heterogeneous parallel systems. Parallel Comput 39(11):669\u2013692","journal-title":"Parallel Comput"},{"key":"7734_CR19","doi-asserted-by":"crossref","unstructured":"Ekanayake Suneth\u00a0Dasantha, Reguly Istv\u00e1n\u00a0Zolt\u00e1n, Luporini Fabio, Mudalige Gihan\u00a0Ravideva (2023) Communication-avoiding optimizations for large-scale unstructured-mesh applications with op2. In: ICPP \u201923: Proceedings of the 52nd international conference on parallel processing","DOI":"10.1145\/3605573.3605604"},{"key":"7734_CR20","unstructured":"Giles Gihan R\u00a0Mudalige (2012) Mike and Istv\u00e1n\u00a0Zolt\u00e1n Reguly. OP 2 Airfoil Example"},{"key":"7734_CR21","unstructured":"Reguly IZ, Mudalige GR, Bertolli C (2014) Rolls-royce hydra cfd code on gpus using op2 abstraction. In: GPU technology conference (GTC) 2014"},{"issue":"5","key":"7734_CR22","doi-asserted-by":"publisher","first-page":"1265","DOI":"10.1109\/TPDS.2015.2453972","volume":"27","author":"IZ Reguly","year":"2016","unstructured":"Reguly IZ, Mudalige GR, Bertolli C, Giles MB, Betts A, Phj K, Radford D (2016) Acceleration of a full-scale industrial cfd application with op2. IEEE Trans Parallel Distrib Syst 27(5):1265\u20131278","journal-title":"IEEE Trans Parallel Distrib Syst"},{"issue":"11","key":"7734_CR23","first-page":"1","volume":"11","author":"Z Reguly Istvan","year":"2018","unstructured":"Reguly Istvan Z, Gopinathan D, Beck JH, Giles MB, Guillas S, Dias F (2018) The volna-op2 tsunami code (version 10). Geosci Model Dev Discuss 11(11):1\u201318","journal-title":"Geosci Model Dev Discuss"},{"key":"7734_CR24","doi-asserted-by":"crossref","unstructured":"Reguly Istv\u00e1n\u00a0Z, Mudalige Gihan\u00a0R (2020) Modernising an industrial cfd application. In: 2020 Eighth international symposium on computing and networking workshops (CANDARW)","DOI":"10.1109\/CANDARW51189.2020.00046"},{"issue":"5","key":"7734_CR25","first-page":"1","volume":"80","author":"H Kangjin","year":"2023","unstructured":"Kangjin H, Che Yonggang X, Chuanfu DZ, Jian Z (2023) Improving cuda performance of an unstructured high-order cfd application under op2 framework. J Supercomput 80(5):1\u201315","journal-title":"J Supercomput"},{"key":"7734_CR26","doi-asserted-by":"crossref","unstructured":"Shen Xipen, Gao Yaoqing, Ding Chen, Archambault Roch (2005) Lightweight reference affinity analysis. In: proceedings of the 19th annual international conference on supercomputing, ICS \u201905, page 131\u2013140","DOI":"10.1145\/1088149.1088167"},{"key":"7734_CR27","doi-asserted-by":"crossref","unstructured":"Zhou Cyrus, Hassman Zack, Shah Dhirpal, Richard Vaughn, Li Yanjing (2024) Yflows: Systematic dataflow exploration and code generation for efficient neural network inference using simd architectures on cpus. In: Proceedings of the 33rd ACM SIGPLAN International Conference on Compiler Construction, CC 2024, page 212\u2013226","DOI":"10.1145\/3640537.3641566"},{"key":"7734_CR28","doi-asserted-by":"crossref","unstructured":"Radtke Pawel\u00a0K, Weinzierl Tobias (2025) Annotation-guided aos-to-soa conversions and gpu offloading with data views in c++","DOI":"10.1002\/cpe.70199"},{"issue":"1","key":"7734_CR29","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1002\/spe.3077","volume":"53","author":"GB Manfred","year":"2023","unstructured":"Manfred GB, Guilherme A, Jakob B, Alexander M, Ren\u00e9 W, Michael B (2023) Llama: the low-level abstraction for memory access. Softw Pract Exp 53(1):115\u2013141","journal-title":"Softw Pract Exp"},{"key":"7734_CR30","unstructured":"Hygon website. https:\/\/www.hygon.cn\/index, (2023)"},{"issue":"2","key":"7734_CR31","doi-asserted-by":"publisher","first-page":"206","DOI":"10.1007\/s42514-023-00153-z","volume":"6","author":"L Yani","year":"2024","unstructured":"Yani L, Feng Z, Zaifeng P, Guo Xiaoguang H, Yihua ZX, Xiaoyong D (2024) Compressed data direct computing for chinese dataset on dcu. CCF Trans High Perform Computing 6(2):206\u2013220","journal-title":"CCF Trans High Perform Computing"},{"key":"7734_CR32","unstructured":"NVIDIA Visual Profiler website. https:\/\/developer.nvidia.com\/nvidia-visual-profiler"},{"key":"7734_CR33","unstructured":"Op2 gpu optimization github repository. https:\/\/github.com\/zongjing1998\/OP2-GPU-optimization"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07734-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-025-07734-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07734-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T03:07:10Z","timestamp":1757387230000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-025-07734-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,15]]},"references-count":33,"journal-issue":{"issue":"12","published-online":{"date-parts":[[2025,8]]}},"alternative-id":["7734"],"URL":"https:\/\/doi.org\/10.1007\/s11227-025-07734-5","relation":{},"ISSN":["1573-0484"],"issn-type":[{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2025,8,15]]},"assertion":[{"value":"30 July 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 August 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"1238"}}