{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T00:16:36Z","timestamp":1760141796047,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031304446"},{"type":"electronic","value":"9783031304453"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-30445-3_8","type":"book-chapter","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T09:02:52Z","timestamp":1682499772000},"page":"89-100","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Algorithm and\u00a0Software Overhead: A Theoretical Approach to\u00a0Performance Portability"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2643-3483","authenticated-orcid":false,"given":"Valeria","family":"Mele","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0057-2573","authenticated-orcid":false,"given":"Giuliano","family":"Laccetti","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,4,27]]},"reference":[{"key":"8_CR1","doi-asserted-by":"publisher","first-page":"947","DOI":"10.1016\/j.future.2017.08.007","volume":"92","author":"SJ Pennycook","year":"2017","unstructured":"Pennycook, S.J., Sewall, J.D., Lee, V.W.: Implications of a metric for performance portability. Future Gener. Comput. Syst. 92, 947\u2013958 (2017). https:\/\/doi.org\/10.1016\/j.future.2017.08.007","journal-title":"Future Gener. Comput. Syst."},{"key":"8_CR2","unstructured":"Kwack, J., et al.: Evaluating performance portability of HPC applications and benchmarks across diverse HPC architectures. Exascale Computing Project (ECP) Webinar. https:\/\/www.exascaleproject.org\/event\/performance-portability-evaluation\/. Accessed 20 May 2020"},{"key":"8_CR3","unstructured":"DOE centres of excellence performance portability meeting: post-meeting report technical report LLNL-TR-700962. Lawrence Livermore National Laboratory, Livermore (2016). https:\/\/asc.llnl.gov\/sites\/asc\/files\/2020-09\/COE-PP-Meeting-2016-FinalReport_0.pdf"},{"issue":"4","key":"8_CR4","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.6008","volume":"33","author":"L Carracciuolo","year":"2021","unstructured":"Carracciuolo, L., Mele, V., Szustak, L.: About the granularity portability of block-based Krylov methods in heterogeneous computing environments. Concurr. Comput. Pract. Exp. 33(4), e6008 (2021). https:\/\/doi.org\/10.1002\/cpe.6008","journal-title":"Concurr. Comput. Pract. Exp."},{"key":"8_CR5","doi-asserted-by":"publisher","unstructured":"Neely, J.R.: DOE centers of excellence performance portability meeting. Technical report LLNL-TR-700962, 4. Lawrence Livermore National Laboratory (2016). https:\/\/doi.org\/10.2172\/1332474","DOI":"10.2172\/1332474"},{"issue":"12","key":"8_CR6","doi-asserted-by":"publisher","first-page":"3202","DOI":"10.1016\/j.jpdc.2014.07.003","volume":"74","author":"HC Edwards","year":"2014","unstructured":"Edwards, H.C., Trott, C.R., Sunderland, D.: Kokkos: enabling manycore performance portability through polymorphic memory access patterns. J. Parallel Distrib. Comput. 74(12), 3202\u20133216 (2014). https:\/\/doi.org\/10.1016\/j.jpdc.2014.07.003","journal-title":"J. Parallel Distrib. Comput."},{"issue":"5","key":"8_CR7","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1109\/MCSE.2021.3097276","volume":"23","author":"J Pennycook","year":"2021","unstructured":"Pennycook, J., Sewall, J., Jacobsen, D.W., Deakin, T., McIntosh-Smith, S.N.: Navigating performance, portability and productivity. Comput. Sci. Eng. 23(5), 28\u201338 (2021). https:\/\/doi.org\/10.1109\/MCSE.2021.3097276","journal-title":"Comput. Sci. Eng."},{"key":"8_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"716","DOI":"10.1007\/978-3-030-10549-5_56","volume-title":"Euro-Par 2018: Parallel Processing Workshops","author":"V Mele","year":"2019","unstructured":"Mele, V., Romano, D., Constantinescu, E.M., Carracciuolo, L., D\u2019Amore, L.: Performance evaluation for a PETSc parallel-in-time solver based on the MGRIT algorithm. In: Mencagli, G., et al. (eds.) Euro-Par 2018. LNCS, vol. 11339, pp. 716\u2013728. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-10549-5_56"},{"key":"8_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1007\/978-3-319-32152-3_3","volume-title":"Parallel Processing and Applied Mathematics","author":"L D\u2019Amore","year":"2016","unstructured":"D\u2019Amore, L., Mele, V., Laccetti, G., Murli, A.: Mathematical approach to the performance evaluation of matrix multiply algorithm. In: Wyrzykowski, R., Deelman, E., Dongarra, J., Karczewski, K., Kitowski, J., Wiatr, K. (eds.) PPAM 2015. LNCS, vol. 9574, pp. 25\u201334. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-32152-3_3"},{"issue":"24","key":"8_CR10","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.4928","volume":"30","author":"V Mele","year":"2018","unstructured":"Mele, V., Constantinescu, E.M., Carracciuolo, L., D\u2019amore, L.: A PETSc parallel-in-time solver based on MGRIT algorithm. Concurr. Comput. Pract. Exp. 30(24), e4928 (2018). https:\/\/doi.org\/10.1002\/cpe.4928","journal-title":"Concurr. Comput. Pract. Exp."},{"issue":"4","key":"8_CR11","doi-asserted-by":"publisher","first-page":"817","DOI":"10.31577\/cai_2019_4_817","volume":"38","author":"L D\u2019Amore","year":"2019","unstructured":"D\u2019Amore, L., Mel, V., Romano, D., Laccetti, G.: Multilevel algebraic approach for performance analysis of parallel algorithms. Comput. Inform. 38(4), 817\u2013850 (2019). https:\/\/doi.org\/10.31577\/cai_2019_4_817","journal-title":"Comput. Inform."},{"issue":"6","key":"8_CR12","doi-asserted-by":"publisher","first-page":"695","DOI":"10.1016\/j.future.2020.06.027","volume":"112","author":"D Romano","year":"2020","unstructured":"Romano, D., Lapegna, M., Mele, V., Laccetti, G.: Designing a GPU-parallel algorithm for raw SAR data compression: a focus on parallel performance estimation. Future Gener. Comput. Syst. 112(6), 695\u2013708 (2020). https:\/\/doi.org\/10.1016\/j.future.2020.06.027","journal-title":"Future Gener. Comput. Syst."},{"key":"8_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"704","DOI":"10.1007\/978-3-642-55224-3_66","volume-title":"Parallel Processing and Applied Mathematics","author":"G Laccetti","year":"2014","unstructured":"Laccetti, G., Lapegna, M., Mele, V., Romano, D.: A study on adaptive algorithms for numerical quadrature on heterogeneous GPU and multicore based systems. In: Wyrzykowski, R., Dongarra, J., Karczewski, K., Wa\u015bniewski, J. (eds.) PPAM 2013. LNCS, vol. 8384, pp. 704\u2013713. Springer, Heidelberg (2014). https:\/\/doi.org\/10.1007\/978-3-642-55224-3_66"},{"issue":"4","key":"8_CR14","doi-asserted-by":"publisher","first-page":"901","DOI":"10.1007\/s10766-015-0398-x","volume":"44","author":"G Laccetti","year":"2015","unstructured":"Laccetti, G., Lapegna, M., Mele, V.: A loosely coordinated model for heap-based priority queues in multicore environments. Int. J. Parallel Prog. 44(4), 901\u2013921 (2015). https:\/\/doi.org\/10.1007\/s10766-015-0398-x","journal-title":"Int. J. Parallel Prog."},{"issue":"19","key":"8_CR15","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.4945","volume":"31","author":"G Laccetti","year":"2019","unstructured":"Laccetti, G., Lapegna, M., Mele, V., Montella, R.: An adaptive algorithm for high-dimensional integrals on heterogeneous CPU-GPU systems. Concurr. Comput. Pract. Exp. 31(19), e4945 (2019). https:\/\/doi.org\/10.1002\/cpe.4945","journal-title":"Concurr. Comput. Pract. Exp."},{"issue":"1","key":"8_CR16","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1007\/s10586-013-0341-0","volume":"17","author":"R Montella","year":"2014","unstructured":"Montella, R., Giunta, G., Laccetti, G.: Virtualizing high-end GPGPUs on ARM clusters for the next generation of high performance cloud computing. Cluster Comput. 17(1), 139\u2013152 (2014). https:\/\/doi.org\/10.1007\/s10586-013-0341-0","journal-title":"Cluster Comput."},{"key":"8_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1007\/978-3-319-78054-2_2","volume-title":"Parallel Processing and Applied Mathematics","author":"L Marcellino","year":"2018","unstructured":"Marcellino, L., et al.: Using GPGPU accelerated interpolation algorithms for marine bathymetry processing with on-premises and cloud based computational resources. In: Wyrzykowski, R., Dongarra, J., Deelman, E., Karczewski, K. (eds.) PPAM 2017. LNCS, vol. 10778, pp. 14\u201324. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-78054-2_2"},{"issue":"1","key":"8_CR18","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1007\/s11075-012-9636-0","volume":"63","author":"L D\u2019Amore","year":"2013","unstructured":"D\u2019Amore, L., Campagna, R., Mele, V., Murli, A., Rizzardi, M.: ReLaTIve. An Ansi C90 software package for the Real Laplace Transform Inversion. Numerical Algorithms 63(1), 187\u2013211 (2013). https:\/\/doi.org\/10.1007\/s11075-012-9636-0","journal-title":"Numerical Algorithms"},{"key":"8_CR19","doi-asserted-by":"publisher","unstructured":"D\u2019Amore, L., Campagna, R., Mele, V., Murli, A.: Algorithm 946. ReLIADiff. An C++ software package for real Laplace transform inversion based on automatic differentiation. ACM Trans. Math. Softw. 40(4), 31:1\u201331:20 (2014). Article 31. https:\/\/doi.org\/10.1145\/2616971","DOI":"10.1145\/2616971"},{"issue":"4","key":"8_CR20","doi-asserted-by":"publisher","first-page":"553","DOI":"10.1080\/17415977.2017.1322963","volume":"26","author":"L D\u2019Amore","year":"2018","unstructured":"D\u2019Amore, L., Mele, V., Campagna, R.: Quality assurance of Gaver\u2019s formula for multi-precision Laplace transform inversion in real case. Inverse Probl. Sci. Eng. 26(4), 553\u2013580 (2018). https:\/\/doi.org\/10.1080\/17415977.2017.1322963","journal-title":"Inverse Probl. Sci. Eng."},{"key":"8_CR21","doi-asserted-by":"publisher","unstructured":"Tjaden. G.S., Flynn. M.J.: Detection and parallel execution of independent instructions. IEEE Trans. Comput. C-19(10), 889\u2013895 (1970). https:\/\/doi.org\/10.1109\/T-C.1970.222795","DOI":"10.1109\/T-C.1970.222795"},{"issue":"1","key":"8_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/0167-8191(89)90003-3","volume":"12","author":"HP Flatt","year":"1989","unstructured":"Flatt, H.P., Kennedy, K.: Performance of parallel processors. Parallel Comput. 12(1), 1\u201320 (1989). https:\/\/doi.org\/10.1016\/0167-8191(89)90003-3","journal-title":"Parallel Comput."},{"issue":"7","key":"8_CR23","doi-asserted-by":"publisher","first-page":"1485","DOI":"10.1016\/j.patcog.2008.10.026","volume":"42","author":"L Maddalena","year":"2009","unstructured":"Maddalena, L., Petrosino, A., Laccetti, G.: A fusion-based approach to digital movie restoration. Pattern Recogn. 42(7), 1485\u20131495 (2009). https:\/\/doi.org\/10.1016\/j.patcog.2008.10.026","journal-title":"Pattern Recogn."},{"key":"8_CR24","doi-asserted-by":"crossref","unstructured":"Hockney, R.W.: The Science of Computer Benchmarking. SIAM (1996)","DOI":"10.1137\/1.9780898719666"},{"key":"8_CR25","doi-asserted-by":"publisher","unstructured":"Ballard, G., Demmel, J., Knight, N.: Avoiding communication in successive band reduction. ACM Trans. Parallel Comput. 1(2), 37 (2015). Article 11. https:\/\/doi.org\/10.1145\/2686877","DOI":"10.1145\/2686877"},{"key":"8_CR26","doi-asserted-by":"publisher","unstructured":"Koanantakool, P., et al.: Communication-avoiding parallel sparse-dense matrix-matrix multiplication. In: IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 842\u2013853 (2016). https:\/\/doi.org\/10.1109\/IPDPS.2016.117","DOI":"10.1109\/IPDPS.2016.117"},{"key":"8_CR27","doi-asserted-by":"publisher","unstructured":"Sao, P., Kannan, R., Li, X.S., Vuduc, R.: A communication-avoiding 3D sparse triangular solver. In: Proceedings of the ACM International Conference on Supercomputing (ICS 2019), pp. 127\u2013137. Association for Computing Machinery, New York (2019). https:\/\/doi.org\/10.1145\/3330345.3330357","DOI":"10.1145\/3330345.3330357"},{"key":"8_CR28","doi-asserted-by":"publisher","unstructured":"Kennedy, K., McKinley, K.S.: Optimizing for parallelism and data locality. In: Proceedings of the 6th International Conference on Supercomputing (ICS 1992), pp. 323\u2013334. Association for Computing Machinery, New York (1992). https:\/\/doi.org\/10.1145\/143369.143427","DOI":"10.1145\/143369.143427"}],"container-title":["Lecture Notes in Computer Science","Parallel Processing and Applied Mathematics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-30445-3_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T04:44:19Z","timestamp":1760071459000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-30445-3_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031304446","9783031304453"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-30445-3_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"27 April 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PPAM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Parallel Processing and Applied Mathematics","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Gdansk","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Poland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ppam2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ppam.edu.pl\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}