{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,24]],"date-time":"2025-08-24T01:27:25Z","timestamp":1755998845835,"version":"3.40.3"},"publisher-location":"Cham","reference-count":22,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031125966"},{"type":"electronic","value":"9783031125973"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-12597-3_21","type":"book-chapter","created":{"date-parts":[[2022,7,31]],"date-time":"2022-07-31T10:02:21Z","timestamp":1659261741000},"page":"335-349","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Generating Work Efficient Scan Implementations for\u00a0GPUs the Functional Way"],"prefix":"10.1007","author":[{"given":"Federico","family":"Pizzuti","sequence":"first","affiliation":[]},{"given":"Michel","family":"Steuwer","sequence":"additional","affiliation":[]},{"given":"Christophe","family":"Dubach","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,8,1]]},"reference":[{"issue":"6","key":"21_CR1","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1145\/1542476.1542481","volume":"44","author":"J Ansel","year":"2009","unstructured":"Ansel, J., et al.: Petabricks: a language and compiler for algorithmic choice. ACM SIGPLAN Notices 44(6), 38\u201349 (2009). https:\/\/doi.org\/10.1145\/1542476.1542481","journal-title":"ACM SIGPLAN Notices"},{"issue":"11","key":"21_CR2","doi-asserted-by":"publisher","first-page":"1526","DOI":"10.1109\/12.42122","volume":"38","author":"GE Blelloch","year":"1989","unstructured":"Blelloch, G.E.: Scans as primitive parallel operations. IEEE Trans. Comput. 38(11), 1526\u20131538 (1989). https:\/\/doi.org\/10.1109\/12.42122","journal-title":"IEEE Trans. Comput."},{"key":"21_CR3","doi-asserted-by":"publisher","unstructured":"Chakravarty, M.M.T., Keller, G., Lee, S., McDonell, T.L., Grover, V.: Accelerating Haskell array codes with multicore GPUs. In: POPL-DAMP (2011). https:\/\/doi.org\/10.1145\/1926354.1926358","DOI":"10.1145\/1926354.1926358"},{"key":"21_CR4","doi-asserted-by":"publisher","unstructured":"Elliott, C.: Generic functional parallel algorithms: Scan and FFT. In: Proceedings of the ACM on Programming Languages, vol. 1, no. ICFP, pp. 1\u201325 (2017). https:\/\/doi.org\/10.1145\/3110251","DOI":"10.1145\/3110251"},{"key":"21_CR5","doi-asserted-by":"publisher","unstructured":"Hagedorn, B., Lenfers, J., Koehler, T., Gorlatch, S., Steuwer, M.: A language for describing optimization strategies. arXiv preprint arXiv:2002.02268 (2020). https:\/\/doi.org\/10.48550\/arXiv.2002.02268","DOI":"10.48550\/arXiv.2002.02268"},{"key":"21_CR6","doi-asserted-by":"publisher","unstructured":"Hagedorn, B., Stoltzfus, L., Steuwer, M., Gorlatch, S., Dubach, C.: High performance stencil code generation with lift. In: CGO (2018). https:\/\/doi.org\/10.48550\/arXiv.2002.02268","DOI":"10.48550\/arXiv.2002.02268"},{"issue":"39","key":"21_CR7","first-page":"851","volume":"3","author":"M Harris","year":"2007","unstructured":"Harris, M., Sengupta, S., Owens, J.D.: Parallel prefix sum (SCAN) with CUDA. GPU Gems 3(39), 851\u2013876 (2007)","journal-title":"GPU Gems"},{"key":"21_CR8","doi-asserted-by":"publisher","unstructured":"Henriksen, T., Serup, N.G.W., Elsman, M., Henglein, F., Oancea, C.E.: Futhark: purely functional GPU-programming with nested parallelism and in-place array updates. In: PLDI (2017). https:\/\/doi.org\/10.1145\/3062341.3062354","DOI":"10.1145\/3062341.3062354"},{"key":"21_CR9","doi-asserted-by":"publisher","unstructured":"Hinze, R.: An algebra of scans. In: International Conference on Mathematics of Program Construction, pp. 186\u2013210 (2004). https:\/\/doi.org\/10.1007\/978-3-540-27764-4_11","DOI":"10.1007\/978-3-540-27764-4_11"},{"key":"21_CR10","doi-asserted-by":"crossref","unstructured":"Koehler, T., Steuwer, M.: Towards a domain-extensible compiler: optimizing an image processing pipeline on mobile CPUs. In: CGO (2021)","DOI":"10.1109\/CGO51591.2021.9370337"},{"key":"21_CR11","doi-asserted-by":"publisher","unstructured":"McCool, M., Reinders, J., Robison, A.: Structured Parallel Programming: Patterns for Efficient Computation. Elsevier, Amsterdam (2012). https:\/\/doi.org\/10.1145\/2382756.2382773","DOI":"10.1145\/2382756.2382773"},{"key":"21_CR12","unstructured":"Merrill, D., Garland, M.: Single-pass parallel prefix scan with decoupled look-back. Technical report NVR-2016-002, NVIDIA (2016)"},{"key":"21_CR13","doi-asserted-by":"publisher","unstructured":"Morita, K., Morihata, A., Matsuzaki, K., Hu, Z., Takeichi, M.: Automatic inversion generates divide-and-conquer parallel programs. In: PLDI (2007). https:\/\/doi.org\/10.1145\/1273442.1250752","DOI":"10.1145\/1273442.1250752"},{"key":"21_CR14","doi-asserted-by":"publisher","unstructured":"Pizzuti, F., Steuwer, M., Dubach, C.: Generating fast sparse matrix vector multiplication from a high level generic functional IR. In: CC (2020). https:\/\/doi.org\/10.1145\/3377555.3377896","DOI":"10.1145\/3377555.3377896"},{"key":"21_CR15","doi-asserted-by":"publisher","unstructured":"Pizzuti, F., Steuwer, M., Dubach, C.: Artifact and instruction for replication of experiments in the Europar \u201922 paper titled: \u201cgenerating work efficient scan implementations for GPUs the functional way\u201d (2022). https:\/\/doi.org\/10.6084\/m9.figshare.19980176","DOI":"10.6084\/m9.figshare.19980176"},{"key":"21_CR16","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.840306","author":"M Puschel","year":"2005","unstructured":"Puschel, M., et al.: Spiral: code generation for DSP transforms. Proc. IEEE (2005). https:\/\/doi.org\/10.1109\/JPROC.2004.840306","journal-title":"Proc. IEEE"},{"key":"21_CR17","doi-asserted-by":"publisher","unstructured":"Ragan-Kelley, J., Barnes, C., Adams, A., Paris, S., Durand, F., Amarasinghe, S.P.: Halide: a language and compiler for optimizing parallelism, locality, and recomputation in image processing pipelines. In: PLDI (2013). https:\/\/doi.org\/10.1145\/2499370.2462176","DOI":"10.1145\/2499370.2462176"},{"key":"21_CR18","doi-asserted-by":"publisher","DOI":"10.1017\/S0956796802004458","author":"S Scholz","year":"2003","unstructured":"Scholz, S.: Single assignment C: efficient support for high-level array operations in a functional setting. J. Funct. Program. (2003). https:\/\/doi.org\/10.1017\/S0956796802004458","journal-title":"J. Funct. Program."},{"key":"21_CR19","unstructured":"Sengupta, S., Harris, M., Garland, M., et al.: Efficient parallel scan algorithms for gpus. Technical report NVR-2008-003, NVIDIA, Santa Clara, CA, 1(1), 1\u201317 (2008)"},{"key":"21_CR20","doi-asserted-by":"publisher","unstructured":"Steuwer, M., Fensch, C., Lindley, S., Dubach, C.: Generating performance portable code using rewrite rules: from high-level functional expressions to high-performance OpenCL code. In: Fisher, K., Reppy, J.H. (eds.) ICFP (2015). https:\/\/doi.org\/10.1145\/2784731.2784754","DOI":"10.1145\/2784731.2784754"},{"key":"21_CR21","doi-asserted-by":"crossref","unstructured":"Steuwer, M., Remmelg, T., Dubach, C.: Lift: a functional data-parallel IR for high-performance GPU code generation. In: CGO (2017)","DOI":"10.1109\/CGO.2017.7863730"},{"issue":"5","key":"21_CR22","doi-asserted-by":"publisher","first-page":"298","DOI":"10.1145\/378795.378860","volume":"36","author":"J Xiong","year":"2001","unstructured":"Xiong, J., Johnson, J., Johnson, R., Padua, D.: SPL: a language and compiler for DSP algorithms. ACM SIGPLAN Not. 36(5), 298\u2013308 (2001). https:\/\/doi.org\/10.1145\/378795.378860","journal-title":"ACM SIGPLAN Not."}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2022: Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-12597-3_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,11]],"date-time":"2022-08-11T23:13:56Z","timestamp":1660259636000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-12597-3_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031125966","9783031125973"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-12597-3_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"1 August 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Euro-Par","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 August 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 August 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"europar2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2022.euro-par.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"102","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"25","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"25% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.97","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}