{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T07:20:54Z","timestamp":1768029654485,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":26,"publisher":"ACM","license":[{"start":{"date-parts":[[2016,3,12]],"date-time":"2016-03-12T00:00:00Z","timestamp":1457740800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,3,12]]},"DOI":"10.1145\/2884045.2884049","type":"proceedings-article","created":{"date-parts":[[2016,3,4]],"date-time":"2016-03-04T20:57:50Z","timestamp":1457125070000},"page":"32-41","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Multi-stage programming for GPUs in C++ using PACXX"],"prefix":"10.1145","author":[{"given":"Michael","family":"Haidl","sequence":"first","affiliation":[{"name":"University of Muenster, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michel","family":"Steuwer","sequence":"additional","affiliation":[{"name":"University of Edinburgh, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tim","family":"Humernbrum","sequence":"additional","affiliation":[{"name":"University of Muenster, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sergei","family":"Gorlatch","sequence":"additional","affiliation":[{"name":"University of Muenster, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2016,3,12]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"AMD. Bolt C++ Template Library 2014. Version 1.2.  AMD. Bolt C++ Template Library 2014. Version 1.2."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2581122.2544165"},{"key":"e_1_3_2_1_3_1","first-page":"359","author":"Bell N.","year":"2011","unstructured":"N. Bell and J. Hoberock . Thrust: A Parallel Template Library. GPU Computing Gems Jade Edition, page 359 , 2011 . N. Bell and J. Hoberock. Thrust: A Parallel Template Library. GPU Computing Gems Jade Edition, page 359, 2011.","journal-title":"Thrust: A Parallel Template Library. GPU Computing Gems Jade Edition, page"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.5555\/954186.954190"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2499370.2462166"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/LLVM-HPC.2014.9"},{"key":"e_1_3_2_1_7_1","volume-title":"Nvidia","author":"Harris M.","year":"2007","unstructured":"M. Harris . Optimizing Parallel Reduction in CUDA . Nvidia , 2007 . M. Harris. Optimizing Parallel Reduction in CUDA. Nvidia, 2007."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1097-024X(199606)26:6%3C635::AID-SPE26%3E3.0.CO;2-P"},{"key":"e_1_3_2_1_9_1","volume-title":"The OpenCL Specification","author":"Khronos Group","year":"2012","unstructured":"Khronos Group . The OpenCL Specification , 2012 . Khronos Group. The OpenCL Specification, 2012."},{"key":"e_1_3_2_1_10_1","volume-title":"The SPIR Specification","author":"Khronos Group","year":"2014","unstructured":"Khronos Group . The SPIR Specification , 2014 . Khronos Group. The SPIR Specification, 2014."},{"key":"e_1_3_2_1_11_1","unstructured":"Khronos Group. SYCL Specifcation 2015.  Khronos Group. SYCL Specifcation 2015."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2011.09.001"},{"key":"e_1_3_2_1_13_1","first-page":"1","volume-title":"Proceedings of the BSD Conference","author":"Lattner C.","year":"2008","unstructured":"C. Lattner . LLVM and Clang: Next Generation Compiler Technology . In Proceedings of the BSD Conference , pages 1 -- 2 , 2008 . C. Lattner. LLVM and Clang: Next Generation Compiler Technology. In Proceedings of the BSD Conference, pages 1--2, 2008."},{"key":"e_1_3_2_1_14_1","first-page":"75","volume-title":"LLVM: A Compilation Framework for Lifelong Program Analysis & Transformation. In CGO 2004","author":"Lattner C.","year":"2004","unstructured":"C. Lattner and V. Adve . LLVM: A Compilation Framework for Lifelong Program Analysis & Transformation. In CGO 2004 , pages 75 -- 86 . IEEE, 2004 . C. Lattner and V. Adve. LLVM: A Compilation Framework for Lifelong Program Analysis & Transformation. In CGO 2004, pages 75--86. IEEE, 2004."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2636228.2636233"},{"key":"e_1_3_2_1_16_1","unstructured":"Nvidia. Parallel Thread Execution ISA. Version 4.3.  Nvidia. Parallel Thread Execution ISA . Version 4.3."},{"key":"e_1_3_2_1_17_1","volume-title":"CUDA Programming Guide","year":"2015","unstructured":"Nvidia. CUDA Programming Guide , 2015 . Version 7.5. Nvidia. CUDA Programming Guide, 2015. Version 7.5."},{"key":"e_1_3_2_1_18_1","unstructured":"Nvidia. CUDA Toolkit 7.5 2015.  Nvidia. CUDA Toolkit 7.5 2015."},{"issue":"1","key":"e_1_3_2_1_19_1","first-page":"677","volume":"3","author":"Nyland L.","year":"2007","unstructured":"L. Nyland , M. Harris , and J. Prins . Fast N-Body Simulation with CUDA. GPU Gems , 3 ( 1 ): 677 -- 696 , 2007 . L. Nyland, M. Harris, and J. Prins. Fast N-Body Simulation with CUDA. GPU Gems, 3(1):677--696, 2007.","journal-title":"Fast N-Body Simulation with CUDA. GPU Gems"},{"key":"e_1_3_2_1_20_1","first-page":"238","volume-title":"SNAPL 2015","volume":"32","author":"Rompf T.","year":"2015","unstructured":"T. Rompf , K. J. Brown , H. Lee , Go meta! A case for generative programming and DSLs in performance critical systems. In 1st Summit on Advances in Programming Languages , SNAPL 2015 , volume 32 of LIPIcs, pages 238 -- 261 . Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik , 2015 . T. Rompf, K. J. Brown, H. Lee, et al. Go meta! A case for generative programming and DSLs in performance critical systems. In 1st Summit on Advances in Programming Languages, SNAPL 2015, volume 32 of LIPIcs, pages 238--261. Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik, 2015."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/1942788.1868314"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2011.269"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2584665"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-25935-0_3"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/258994.259019"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/1809028.1806642"}],"event":{"name":"PPoPP '16: 21st ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","location":"Barcelona Spain","acronym":"PPoPP '16"},"container-title":["Proceedings of the 9th Annual Workshop on General Purpose Processing using Graphics Processing Unit"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2884045.2884049","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2884045.2884049","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:54:08Z","timestamp":1750222448000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2884045.2884049"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,3,12]]},"references-count":26,"alternative-id":["10.1145\/2884045.2884049","10.1145\/2884045"],"URL":"https:\/\/doi.org\/10.1145\/2884045.2884049","relation":{},"subject":[],"published":{"date-parts":[[2016,3,12]]},"assertion":[{"value":"2016-03-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}