{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,4]],"date-time":"2025-10-04T00:38:32Z","timestamp":1759538312389,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":15,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031901997"},{"type":"electronic","value":"9783031902000"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-90200-0_35","type":"book-chapter","created":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T12:42:37Z","timestamp":1749559357000},"page":"438-449","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Element Scheduling for\u00a0GPU-Accelerated Finite-Volumes Computations"],"prefix":"10.1007","author":[{"given":"Franco","family":"Seveso","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ernesto","family":"Dufrechou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pablo","family":"Ezzatti","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gabriel","family":"Usera","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,6,11]]},"reference":[{"key":"35_CR1","doi-asserted-by":"publisher","unstructured":"de\u00a0Castro, M., Santamaria-Valenzuela, I., Torres, Y., Gonzalez-Escribano, A., Llanos, D.R.: Epsilod: efficient parallel skeleton for generic iterative stencil computations in distributed GPUs. J. Supercomput. 79(9), 9409\u20139442 (2023). https:\/\/doi.org\/10.1007\/s11227-022-05040-y","DOI":"10.1007\/s11227-022-05040-y"},{"key":"35_CR2","unstructured":"Deserno, F., Hager, G., Brechtefeld, F., Wellein, G.: Basic optimization strategies for CFD-codes. Regionales Rechenzentrum Erlangen, Technical report (2002)"},{"key":"35_CR3","doi-asserted-by":"publisher","unstructured":"Dufrechou, E., Ezzatti, P., Usera, G.: Avoiding synchronization to accelerate a CFD solver in GPU. In: 2019 31st International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD), pp. 204\u2013211 (2019). https:\/\/doi.org\/10.1109\/SBAC-PAD.2019.00041","DOI":"10.1109\/SBAC-PAD.2019.00041"},{"key":"35_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"530","DOI":"10.1007\/978-3-642-31128-4_39","volume-title":"Computational Science and Its Applications \u2013 ICCSA 2012","author":"P Igounet","year":"2012","unstructured":"Igounet, P., Alfaro, P., Usera, G., Ezzatti, P.: GPU acceleration of the caffa3d.MB model. In: Murgante, B., et al. (eds.) ICCSA 2012. LNCS, vol. 7336, pp. 530\u2013542. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-31128-4_39"},{"key":"35_CR5","unstructured":"Maruyama, N., Aoki, T.: Optimizing stencil computations for Nvidia Kepler GPUs (2014). https:\/\/api.semanticscholar.org\/CorpusID:17605545"},{"key":"35_CR6","doi-asserted-by":"publisher","unstructured":"Matsumura, K., Zohouri, H.R., Wahib, M., Endo, T., Matsuoka, S.: AN5D: automated stencil framework for high-degree temporal blocking on GPUs. In: Proceedings of the 18th ACM\/IEEE International Symposium on Code Generation and Optimization, CGO 2020, pp. 199\u2013211. Association for Computing Machinery, New York, NY, USA (2020). https:\/\/doi.org\/10.1145\/3368826.3377904","DOI":"10.1145\/3368826.3377904"},{"key":"35_CR7","unstructured":"NVIDIA: Kepler TM GK110 the fastest, most efficient HPC architecture ever built (2012). Accessed 10 June 2017"},{"key":"35_CR8","doi-asserted-by":"publisher","unstructured":"Rawat, P.S., Rastello, F., Sukumaran-Rajam, A., Pouchet, L.N., Rountev, A., Sadayappan, P.: Register optimizations for stencils on GPUs. In: Proceedings of the 23rd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, PPoPP 2018, pp. 168\u2013182. Association for Computing Machinery, New York, NY, USA (2018). https:\/\/doi.org\/10.1145\/3178487.3178500","DOI":"10.1145\/3178487.3178500"},{"issue":"11","key":"35_CR9","doi-asserted-by":"publisher","first-page":"1902","DOI":"10.1109\/JPROC.2018.2862896","volume":"106","author":"PS Rawat","year":"2018","unstructured":"Rawat, P.S., et al.: Domain-specific optimization and generation of high-performance GPU code for stencil computations. Proc. IEEE 106(11), 1902\u20131920 (2018). https:\/\/doi.org\/10.1109\/JPROC.2018.2862896","journal-title":"Proc. IEEE"},{"key":"35_CR10","doi-asserted-by":"publisher","unstructured":"Rawat, P.S., Vaidya, M., Sukumaran-Rajam, A., Rountev, A., Pouchet, L.N., Sadayappan, P.: On optimizing complex stencils on GPUs. In: 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 641\u2013652 (2019). https:\/\/doi.org\/10.1109\/IPDPS.2019.00073","DOI":"10.1109\/IPDPS.2019.00073"},{"key":"35_CR11","doi-asserted-by":"publisher","unstructured":"Sai, R., Mellor-Crummey, J., Meng, X., Araya-Polo, M., Meng, J.: Using the semi-stencil algorithm to accelerate high-order stencils on GPUs. In: 2021 International Workshop on Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS), pp. 63\u201368 (2021). https:\/\/doi.org\/10.1109\/PMBS54543.2021.00012","DOI":"10.1109\/PMBS54543.2021.00012"},{"key":"35_CR12","doi-asserted-by":"crossref","unstructured":"Stone, H.L.: Iterative solution of implicit approximations of multidimensional partial differential equations. SIAM J. Numer. Anal. 5(3), 530\u2013558 (1968). http:\/\/www.jstor.org\/stable\/2949703","DOI":"10.1137\/0705044"},{"issue":"3","key":"35_CR13","doi-asserted-by":"publisher","first-page":"471","DOI":"10.1007\/s10494-008-9153-3","volume":"81","author":"G Usera","year":"2008","unstructured":"Usera, G., Vernet, A., Ferr\u00e9, J.: A parallel block-structured finite volume method for flows in complex geometry with sliding interfaces. Flow Turbul. Combust. 81(3), 471\u2013495 (2008). https:\/\/doi.org\/10.1007\/s10494-008-9153-3","journal-title":"Flow Turbul. Combust."},{"key":"35_CR14","doi-asserted-by":"publisher","unstructured":"Zhang, L., et al.: Revisiting temporal blocking stencil optimizations. In: Proceedings of the 37th International Conference on Supercomputing, ICS 2023, pp. 251\u2013263. Association for Computing Machinery, New York, NY, USA (2023). https:\/\/doi.org\/10.1145\/3577193.3593716","DOI":"10.1145\/3577193.3593716"},{"key":"35_CR15","doi-asserted-by":"publisher","unstructured":"Zhang, L., Wahib, M., Zhang, H., Matsuoka, S.: A study of single and multi-device synchronization methods in Nvidia GPUs. In: 2020 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 483\u2013493 (2020). https:\/\/doi.org\/10.1109\/IPDPS47924.2020.00057","DOI":"10.1109\/IPDPS47924.2020.00057"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2024: Parallel Processing Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-90200-0_35","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,3]],"date-time":"2025-10-03T12:59:06Z","timestamp":1759496346000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-90200-0_35"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031901997","9783031902000"],"references-count":15,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-90200-0_35","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"11 June 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Euro-Par","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Madrid","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"europar2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2024.euro-par.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}