{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T09:16:39Z","timestamp":1770714999809,"version":"3.49.0"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["CCF Trans. HPC"],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1007\/s42514-024-00212-z","type":"journal-article","created":{"date-parts":[[2025,2,14]],"date-time":"2025-02-14T19:17:26Z","timestamp":1739560646000},"page":"1-16","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["An empirical performance evaluation of SYCL on ARM multi-core processors"],"prefix":"10.1007","volume":"7","author":[{"given":"Hanzheng","family":"Liang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chencheng","family":"Deng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peng","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianbin","family":"Fang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tao","family":"Tang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chun","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,2,14]]},"reference":[{"key":"212_CR1","doi-asserted-by":"publisher","unstructured":"Alpay, A., Heuveline, V.: One pass to bind them: The first single-pass sycl compiler with unified code representation across backends. In: Proceedings of the 2023 International Workshop on OpenCL. IWOCL \u201923. Association for Computing Machinery, New York, NY, USA (2023). https:\/\/doi.org\/10.1145\/3585341.3585351","DOI":"10.1145\/3585341.3585351"},{"key":"212_CR2","doi-asserted-by":"publisher","unstructured":"Alpay, A., Soproni, B., W\u00fcnsche, H., Heuveline, V.: Exploring the possibility of a hipsycl-based implementation of oneapi. In: Proceedings of the 10th International Workshop on OpenCL. IWOCL \u201922. Association for Computing Machinery, New York, NY, USA (2022). https:\/\/doi.org\/10.1145\/3529538.3530005","DOI":"10.1145\/3529538.3530005"},{"key":"212_CR3","doi-asserted-by":"publisher","unstructured":"Arima, E., Kodama, Y., Odajima, T., Tsuji, M., Sato, M.: Power\/performance\/area evaluations for next-generation hpc processors using the a64fx chip. In: 2021 IEEE Symposium in Low-Power and High-Speed Chips (COOL CHIPS), pp. 1\u20136 (2021). https:\/\/doi.org\/10.1109\/COOLCHIPS52128.2021.9410320","DOI":"10.1109\/COOLCHIPS52128.2021.9410320"},{"key":"212_CR4","doi-asserted-by":"publisher","unstructured":"Beckingsale, D.A., Burmark, J., Hornung, R., Jones, H., Killian, W., Kunen, A.J., Pearce, O., Robinson, P., Ryujin, B.S., Scogland, T.R.: Raja: Portable performance for large-scale scientific applications. In: 2019 IEEE\/ACM International Workshop on Performance, Portability and Productivity in HPC (P3HPC), pp. 71\u201381 (2019). https:\/\/doi.org\/10.1109\/P3HPC49587.2019.00012","DOI":"10.1109\/P3HPC49587.2019.00012"},{"key":"212_CR5","doi-asserted-by":"publisher","unstructured":"Breyer, M., Van\u00a0Craen, A., Pfl\u00fcger, D.: A comparison of sycl, opencl, cuda, and openmp for massively parallel support vector machine classification on multi-vendor hardware. In: Proceedings of the 10th International Workshop on OpenCL. IWOCL \u201922. Association for Computing Machinery, New York, NY, USA (2022). https:\/\/doi.org\/10.1145\/3529538.3529980","DOI":"10.1145\/3529538.3529980"},{"key":"212_CR6","doi-asserted-by":"publisher","unstructured":"Deakin, T., McIntosh-Smith, S., Alpay, A., Heuveline, V.: Benchmarking and extending sycl hierarchical parallelism. In: 2021 IEEE\/ACM International Workshop on Hierarchical Parallelism for Exascale Computing (HiPar), pp. 10\u201319 (2021). https:\/\/doi.org\/10.1109\/HiPar54615.2021.00007","DOI":"10.1109\/HiPar54615.2021.00007"},{"key":"212_CR7","doi-asserted-by":"publisher","unstructured":"Edwards, H.C., Trott, C.R.: Kokkos: Enabling performance portability across manycore architectures. In: 2013 Extreme Scaling Workshop (xsw 2013), pp. 18\u201324 (2013). https:\/\/doi.org\/10.1109\/XSW.2013.7","DOI":"10.1109\/XSW.2013.7"},{"issue":"1","key":"212_CR8","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1007\/S11390-020-0741-6","volume":"36","author":"J Fang","year":"2021","unstructured":"Fang, J., Liao, X., Huang, C., Dong, D.: Performance evaluation of memory-centric armv8 many-core architectures: a case study with phytium 2000+. J. Comput. Sci. Technol. 36(1), 33\u201343 (2021). https:\/\/doi.org\/10.1007\/S11390-020-0741-6","journal-title":"J. Comput. Sci. Technol."},{"issue":"6","key":"212_CR9","doi-asserted-by":"publisher","first-page":"1323","DOI":"10.1007\/S11390-021-1251-X","volume":"38","author":"W Gao","year":"2023","unstructured":"Gao, W., Fang, J., Huang, C., Xu, C., Wang, Z.: Wrbench: comparing cache architectures and coherency protocols on armv8 many-core systems. J. Comput. Sci. Technol. 38(6), 1323\u20131338 (2023). https:\/\/doi.org\/10.1007\/S11390-021-1251-X","journal-title":"J. Comput. Sci. Technol."},{"key":"212_CR10","doi-asserted-by":"publisher","unstructured":"Ghiglio, P., Dolinsky, U., Goli, M., Narasimhan, K.: Improving performance of sycl applications on cpu architectures using llvm-directed compilation flow. In: Proceedings of the Thirteenth International Workshop on Programming Models and Applications for Multicores and Manycores. PMAM \u201922, pp. 1\u201310. Association for Computing Machinery, New York, NY, USA (2022). https:\/\/doi.org\/10.1145\/3528425.3529099","DOI":"10.1145\/3528425.3529099"},{"key":"212_CR11","doi-asserted-by":"publisher","DOI":"10.1002\/CPE.7810","author":"P Ghiglio","year":"2023","unstructured":"Ghiglio, P., Dolinsky, U., Goli, M., Narasimhan, K.: Improving performance of SYCL applications on CPU architectures using llvm-directed compilation flow. Concurr. Comput. Pract. Exp. (2023). https:\/\/doi.org\/10.1002\/CPE.7810","journal-title":"Concurr. Comput. Pract. Exp."},{"key":"212_CR12","unstructured":"Group, K.: SYCL$$^{{\\rm TM}}$$ 2020 Specification (revision 8) (2024). https:\/\/registry.khronos.org\/SYCL\/specs\/sycl-2020\/html\/sycl-2020.html"},{"key":"212_CR13","doi-asserted-by":"publisher","unstructured":"Ivanov, I.R., Zinenko, O., Domke, J., Endo, T., Moses, W.S.: Retargeting and respecializing gpu workloads for performance portability. In: 2024 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO), pp. 119\u2013132 (2024). https:\/\/doi.org\/10.1109\/CGO57630.2024.10444828","DOI":"10.1109\/CGO57630.2024.10444828"},{"key":"212_CR14","unstructured":"J\u00e4\u00e4skel\u00e4inen, P., La\u00a0Lama, C.S., Schnetter, E., Raiskila, K., Takala, J., Berg, H.: pocl: a performance-portable opencl implementation. CoRR abs\/1611.07083 (2016) arXiv:1611.07083"},{"key":"212_CR15","doi-asserted-by":"publisher","unstructured":"Jin, Z., Vetter, J.: Evaluating cuda portability with hipcl and dpct. In: 2021 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), pp. 371\u2013376 (2021). https:\/\/doi.org\/10.1109\/IPDPSW52791.2021.00065","DOI":"10.1109\/IPDPSW52791.2021.00065"},{"key":"212_CR16","doi-asserted-by":"publisher","unstructured":"Jin, Z., Vetter, J.S.: A benchmark suite for improving performance portability of the sycl programming model. In: 2023 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), pp. 325\u2013327 (2023). https:\/\/doi.org\/10.1109\/ISPASS57527.2023.00041","DOI":"10.1109\/ISPASS57527.2023.00041"},{"key":"212_CR17","doi-asserted-by":"publisher","unstructured":"Johnston, B., Vetter, J.S., Milthorpe, J.: Evaluating the performance and portability of contemporary sycl implementations. In: 2020 IEEE\/ACM International Workshop on Performance, Portability and Productivity in HPC (P3HPC), pp. 45\u201356 (2020). https:\/\/doi.org\/10.1109\/P3HPC51967.2020.00010","DOI":"10.1109\/P3HPC51967.2020.00010"},{"key":"212_CR18","doi-asserted-by":"publisher","unstructured":"Kaneko, S., Takizawa, H., Sano, K.: A sycl-based high-level programming framework for hpc programmers to use remote fpga clusters. In: Proceedings of the 12th International Symposium on Highly-Efficient Accelerators and Reconfigurable Technologies. HEART \u201922, pp. 92\u201394. Association for Computing Machinery, New York, NY, USA (2022). https:\/\/doi.org\/10.1145\/3535044.3535058","DOI":"10.1145\/3535044.3535058"},{"key":"212_CR19","doi-asserted-by":"publisher","unstructured":"Ke, Y., Agung, M., Takizawa, H.: neosycl: a sycl implementation for sx-aurora tsubasa. In: The International Conference on High Performance Computing in Asia-Pacific Region. HPCAsia \u201921, pp. 50\u201357. Association for Computing Machinery, New York, NY, USA (2021). https:\/\/doi.org\/10.1145\/3432261.3432268","DOI":"10.1145\/3432261.3432268"},{"key":"212_CR20","doi-asserted-by":"publisher","unstructured":"Kim, J., Dao, T.T., Jung, J., Joo, J., Lee, J.: Bridging opencl and cuda: a comparative analysis and translation. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis. SC \u201915. Association for Computing Machinery, New York, NY, USA (2015). https:\/\/doi.org\/10.1145\/2807591.2807621","DOI":"10.1145\/2807591.2807621"},{"key":"212_CR21","doi-asserted-by":"publisher","first-page":"629","DOI":"10.1007\/978-3-030-57675-2_39","volume-title":"Euro-Par 2020: Parallel Processing","author":"S Lal","year":"2020","unstructured":"Lal, S., Alpay, A., Salzmann, P., Cosenza, B., Hirsch, A., Stawinoga, N., Thoman, P., Fahringer, T., Heuveline, V.: Sycl-bench: a versatile cross-platform benchmark suite for heterogeneous computing. In: Malawski, M., Rzadca, K. (eds.) Euro-Par 2020: Parallel Processing, pp. 629\u2013644. Springer, Cham (2020)"},{"key":"212_CR22","doi-asserted-by":"publisher","unstructured":"Lattner, C., Amini, M., Bondhugula, U., Cohen, A., Davis, A., Pienaar, J., Riddle, R., Shpeisman, T., Vasilache, N., Zinenko, O.: Mlir: Scaling compiler infrastructure for domain specific computation. In: 2021 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO), pp. 2\u201314 (2021). https:\/\/doi.org\/10.1109\/CGO51591.2021.9370308","DOI":"10.1109\/CGO51591.2021.9370308"},{"key":"212_CR23","doi-asserted-by":"publisher","unstructured":"Lin, W.-C., Deakin, T., McIntosh-Smith, S.: On measuring the maturity of sycl implementations by tracking historical performance improvements. In: Proceedings of the 9th International Workshop on OpenCL. IWOCL \u201921, pp. 1\u201313. Association for Computing Machinery, New York, NY, USA (2021). https:\/\/doi.org\/10.1145\/3456669.3456701","DOI":"10.1145\/3456669.3456701"},{"key":"212_CR24","doi-asserted-by":"publisher","unstructured":"Memeti, S., Li, L., Pllana, S., Ko\u0142odziej, J., Kessler, C.: Benchmarking opencl, openacc, openmp, and cuda: Programming productivity, performance, and energy consumption. In: Proceedings of the 2017 Workshop on Adaptive Resource Management and Scheduling for Cloud Computing. ARMS-CC \u201917, pp. 1\u20136. Association for Computing Machinery, New York, NY, USA (2017). https:\/\/doi.org\/10.1145\/3110355.3110356","DOI":"10.1145\/3110355.3110356"},{"key":"212_CR25","doi-asserted-by":"publisher","unstructured":"Meyer, J., Alpay, A., Hack, S., Fr\u00f6ning, H., Heuveline, V.: Implementation techniques for spmd kernels on cpus. In: Proceedings of the 2023 International Workshop on OpenCL. IWOCL \u201923. Association for Computing Machinery, New York, NY, USA (2023). https:\/\/doi.org\/10.1145\/3585341.3585342","DOI":"10.1145\/3585341.3585342"},{"key":"212_CR26","doi-asserted-by":"publisher","unstructured":"Moses, W.S., Chelini, L., Zhao, R., Zinenko, O.: Polygeist: Raising c to polyhedral mlir. In: 2021 30th International Conference on Parallel Architectures and Compilation Techniques (PACT), pp. 45\u201359 (2021). https:\/\/doi.org\/10.1109\/PACT52795.2021.00011","DOI":"10.1109\/PACT52795.2021.00011"},{"key":"212_CR27","doi-asserted-by":"publisher","unstructured":"Moses, W.S., Ivanov, I.R., Domke, J., Endo, T., Doerfert, J., Zinenko, O.: High-performance gpu-to-cpu transpilation and optimization via high-level parallel constructs. In: Proceedings of the 28th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming. PPoPP \u201923, pp. 119\u2013134. Association for Computing Machinery, New York, NY, USA (2023). https:\/\/doi.org\/10.1145\/3572848.3577475","DOI":"10.1145\/3572848.3577475"},{"key":"212_CR28","doi-asserted-by":"publisher","unstructured":"Pedretti, K., Younge, A.J., Hammond, S.D., Laros\u00a0III, J.H., Curry, M.L., Aguilar, M.J., Hoekstra, R.J., Brightwell, R.: Chronicles of astra: Challenges and lessons from the first petascale arm supercomputer. In: SC20: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201314 (2020). https:\/\/doi.org\/10.1109\/SC41405.2020.00052","DOI":"10.1109\/SC41405.2020.00052"},{"key":"212_CR29","doi-asserted-by":"publisher","first-page":"947","DOI":"10.1016\/j.future.2017.08.007","volume":"92","author":"SJ Pennycook","year":"2019","unstructured":"Pennycook, S.J., Sewall, J.D., Lee, V.W.: Implications of a metric for performance portability. Futur. Gener. Comput. Syst. 92, 947\u2013958 (2019). https:\/\/doi.org\/10.1016\/j.future.2017.08.007","journal-title":"Futur. Gener. Comput. Syst."},{"key":"212_CR30","doi-asserted-by":"publisher","DOI":"10.1145\/3571284","author":"V P\u00e9rez","year":"2023","unstructured":"P\u00e9rez, V., Sommer, L., Lom\u00fcller, V., Narasimhan, K., Goli, M.: User-driven online kernel fusion for sycl. ACM Trans. Archit. Code Optim. (2023). https:\/\/doi.org\/10.1145\/3571284","journal-title":"ACM Trans. Archit. Code Optim."},{"key":"212_CR31","doi-asserted-by":"publisher","unstructured":"Ragan-Kelley, J., Barnes, C., Adams, A., Paris, S., Durand, F., Amarasinghe, S.: Halide: a language and compiler for optimizing parallelism, locality, and recomputation in image processing pipelines. In: Proceedings of the 34th ACM SIGPLAN Conference on Programming Language Design and Implementation. PLDI \u201913, pp. 519\u2013530. Association for Computing Machinery, New York, NY, USA (2013). https:\/\/doi.org\/10.1145\/2491956.2462176","DOI":"10.1145\/2491956.2462176"},{"key":"212_CR32","doi-asserted-by":"publisher","unstructured":"Rangel, E.M., Pennycook, S.J., Pope, A., Frontiere, N., Ma, Z., Madananth, V.: A performance-portable sycl implementation of crk-hacc for exascale. In: Proceedings of the SC \u201923 Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis. SC-W \u201923, pp. 1114\u20131125. Association for Computing Machinery, New York, NY, USA (2023). https:\/\/doi.org\/10.1145\/3624062.3624187","DOI":"10.1145\/3624062.3624187"},{"key":"212_CR33","doi-asserted-by":"publisher","unstructured":"Reguly, I.Z.: Evaluating the performance portability of sycl across cpus and gpus on bandwidth-bound applications. In: Proceedings of the SC \u201923 Workshops of The International Conference on High Performance Computing, Network, Storage, and Analysis. SC-W \u201923, pp. 1038\u20131047. Association for Computing Machinery, New York, NY, USA (2023). https:\/\/doi.org\/10.1145\/3624062.3624180","DOI":"10.1145\/3624062.3624180"},{"key":"212_CR34","doi-asserted-by":"publisher","first-page":"339","DOI":"10.1007\/978-3-031-32041-5_18","volume-title":"High Performance Computing","author":"I Sakiotis","year":"2023","unstructured":"Sakiotis, I., Arumugam, K., Paterno, M., Ranjan, D., Terzi\u0107, B., Zubair, M.: Porting numerical integration codes from cuda to oneapi: a case study. In: Bhatele, A., Hammond, J., Baboulin, M., Kruse, C. (eds.) High Performance Computing, pp. 339\u2013358. Springer, Cham (2023)"},{"key":"212_CR35","doi-asserted-by":"publisher","first-page":"605","DOI":"10.1007\/978-3-031-40843-4_45","volume-title":"High Performance Computing","author":"WR Shilpage","year":"2023","unstructured":"Shilpage, W.R., Wright, S.A.: An investigation into the performance and portability of sycl compiler implementations. In: Bienz, A., Weiland, M., Baboulin, M., Kruse, C. (eds.) High Performance Computing, pp. 605\u2013619. Springer, Cham (2023)"},{"key":"212_CR36","doi-asserted-by":"publisher","unstructured":"Silva, H.C., Pisani, F., Borin, E.: A comparative study of sycl, opencl, and openmp. In: 2016 International Symposium on Computer Architecture and High Performance Computing Workshops (SBAC-PADW), pp. 61\u201366 (2016). https:\/\/doi.org\/10.1109\/SBAC-PADW.2016.19","DOI":"10.1109\/SBAC-PADW.2016.19"},{"key":"212_CR37","doi-asserted-by":"publisher","unstructured":"Thoman, P., Molina\u00a0Heredia, F., Fahringer, T.: On the compilation performance of current sycl implementations. In: Proceedings of the 10th International Workshop on OpenCL. IWOCL \u201922. Association for Computing Machinery, New York, NY, USA (2022). https:\/\/doi.org\/10.1145\/3529538.3529548","DOI":"10.1145\/3529538.3529548"},{"key":"212_CR38","doi-asserted-by":"publisher","unstructured":"Tiotto, E., P\u00e9rez, V., Tsang, W., Sommer, L., Oppermann, J., Lom\u00fcller, V., Goli, M., Brodman, J.: Experiences building an mlir-based sycl compiler. In: 2024 IEEE\/ACM International Symposium on Code Generation and Optimization (CGO), pp. 399\u2013410 (2024). https:\/\/doi.org\/10.1109\/CGO57630.2024.10444866","DOI":"10.1109\/CGO57630.2024.10444866"},{"key":"212_CR39","doi-asserted-by":"publisher","first-page":"859","DOI":"10.1007\/978-3-642-32820-6_85","volume-title":"Euro-Par 2012 Parallel Processing","author":"S Wienke","year":"2012","unstructured":"Wienke, S., Springer, P., Terboven, C., Mey, D.: Openacc \u2013 first experiences with real-world applications. In: Kaklamanis, C., Papatheodorou, T., Spirakis, P.G. (eds.) Euro-Par 2012 Parallel Processing, pp. 859\u2013870. Springer, Berlin, Heidelberg (2012)"}],"container-title":["CCF Transactions on High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-024-00212-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42514-024-00212-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-024-00212-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,24]],"date-time":"2025-02-24T10:50:35Z","timestamp":1740394235000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42514-024-00212-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2]]},"references-count":39,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,2]]}},"alternative-id":["212"],"URL":"https:\/\/doi.org\/10.1007\/s42514-024-00212-z","relation":{},"ISSN":["2524-4922","2524-4930"],"issn-type":[{"value":"2524-4922","type":"print"},{"value":"2524-4930","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2]]},"assertion":[{"value":"30 June 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 December 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 February 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"On behalf of all authors, the corresponding author states that there is no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}