{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,27]],"date-time":"2026-02-27T03:47:13Z","timestamp":1772164033056,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":58,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,2,10]],"date-time":"2018-02-10T00:00:00Z","timestamp":1518220800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"U.S. National Science Foundation","award":["1440749, 1513120"],"award-info":[{"award-number":["1440749, 1513120"]}]},{"name":"U.S. Department of Energy Exascale Computing Project","award":["17-SC-20-SC"],"award-info":[{"award-number":["17-SC-20-SC"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,2,10]]},"DOI":"10.1145\/3178487.3178500","type":"proceedings-article","created":{"date-parts":[[2018,2,6]],"date-time":"2018-02-06T13:12:23Z","timestamp":1517922743000},"page":"168-182","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":36,"title":["Register optimizations for stencils on GPUs"],"prefix":"10.1145","author":[{"given":"Prashant Singh","family":"Rawat","sequence":"first","affiliation":[{"name":"The Ohio State University"}]},{"given":"Fabrice","family":"Rastello","sequence":"additional","affiliation":[{"name":"INRIA"}]},{"given":"Aravind","family":"Sukumaran-Rajam","sequence":"additional","affiliation":[{"name":"The Ohio State University"}]},{"given":"Louis-No\u00ebl","family":"Pouchet","sequence":"additional","affiliation":[{"name":"Colorado State University"}]},{"given":"Atanas","family":"Rountev","sequence":"additional","affiliation":[{"name":"The Ohio State University"}]},{"given":"P.","family":"Sadayappan","sequence":"additional","affiliation":[{"name":"The Ohio State University"}]}],"member":"320","published-online":{"date-parts":[[2018,2,10]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Compilers: Principles, Techniques, and Tools","author":"Aho A.","year":"2007","unstructured":"A. Aho , M. Lam , R. Sethi , and J. Ullman . 2007 . Compilers: Principles, Techniques, and Tools ( 2 nd ed). Pearson . A. Aho, M. Lam, R. Sethi, and J. Ullman. 2007. Compilers: Principles, Techniques, and Tools (2nd ed). Pearson.","edition":"2"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/800116.803770"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/321992.322001"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1002\/spe.4380170607"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.5555\/2388996.2389051"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2015.103"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.5555\/645676.663774"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1375581.1375595"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/143095.143143"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/177492.177575"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/800230.806984"},{"key":"e_1_3_2_2_12_1","volume-title":"Proceedings 2001 International Conference on Parallel Architectures and Compilation Techniques. 175--184","author":"Codina J. M.","unstructured":"J. M. Codina , J. Sanchez , and A. Gonzalez . 2001. A unified modulo scheduling and register allocation technique for clustered processors . In Proceedings 2001 International Conference on Parallel Architectures and Compilation Techniques. 175--184 . J. M. Codina, J. Sanchez, and A. Gonzalez. 2001. A unified modulo scheduling and register allocation technique for clustered processors. In Proceedings 2001 International Conference on Parallel Architectures and Compilation Techniques. 175--184."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2038698.2038708"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.5555\/1882792.1882852"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/377792.377807"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2892208.2892219"},{"key":"e_1_3_2_2_17_1","volume-title":"Turbulence: Proxy App Software. https:\/\/exactcodesign.org\/proxy-app-software\/.","author":"CT","year":"2013","unstructured":"Exa CT 2013. Exa CT : Center for Exascale Simulation of Combustion in Turbulence: Proxy App Software. https:\/\/exactcodesign.org\/proxy-app-software\/. ( 2013 ). ExaCT 2013. ExaCT: Center for Exascale Simulation of Combustion in Turbulence: Proxy App Software. https:\/\/exactcodesign.org\/proxy-app-software\/. (2013)."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2004.840301"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/1356052.1356053"},{"key":"e_1_3_2_2_20_1","volume-title":"Proceedings of the 15th International Parallel & Distributed Processing Symposium (IPDPS '01)","author":"Govindarajan Ramaswamy","unstructured":"Ramaswamy Govindarajan , H. Yang , Chihong Zhang , Jos\u00e9 N. Amaral , and Guang R. Gao . 2001. Minimum Register Instruction Sequence Problem: Revisiting Optimal Code Generation for DAGs . In Proceedings of the 15th International Parallel & Distributed Processing Symposium (IPDPS '01) . IEEE Computer Society, Washington, DC, USA, 26--33. Ramaswamy Govindarajan, H. Yang, Chihong Zhang, Jos\u00e9 N. Amaral, and Guang R. Gao. 2001. Minimum Register Instruction Sequence Problem: Revisiting Optimal Code Generation for DAGs. In Proceedings of the 15th International Parallel & Distributed Processing Symposium (IPDPS '01). IEEE Computer Society, Washington, DC, USA, 26--33."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2581122.2544160"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2751205.2751223"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-13374-9_4"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988336.2988355"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2464996.2467268"},{"key":"e_1_3_2_2_26_1","unstructured":"HPGMG 2016. High-Performance Geometric Multigrid. https:\/\/hpgmg.org\/. (2016).  HPGMG 2016. High-Performance Geometric Multigrid. https:\/\/hpgmg.org\/. (2016)."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/800076.802486"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2903150.2903158"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/1133981.1134006"},{"key":"e_1_3_2_2_30_1","volume-title":"FFT Compiler Techniques. In Compiler Construction: 13th International Conference, CC 2004. Springer Berlin Heidelberg, 217--231","author":"Kral Stefan","year":"2004","unstructured":"Stefan Kral , Franz Franchetti , Juergen Lorenz , Christoph W. Ueberhuber , and Peter Wurzinger . 2004 . FFT Compiler Techniques. In Compiler Construction: 13th International Conference, CC 2004. Springer Berlin Heidelberg, 217--231 . Stefan Kral, Franz Franchetti, Juergen Lorenz, Christoph W. Ueberhuber, and Peter Wurzinger. 2004. FFT Compiler Techniques. In Compiler Construction: 13th International Conference, CC 2004. Springer Berlin Heidelberg, 217--231."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.5555\/977395.977673"},{"key":"e_1_3_2_2_32_1","volume-title":"Automation Test in Europe Conference Exhibition (DATE). 1273--1278","author":"Li A.","unstructured":"A. Li , S. L. Song , A. Kumar , E. Z. Zhang , D. Chavarr\u00eda-Miranda , and H. Corp oraal . 2016. Critical points based register-concurrency auto-tuning for GPUs. In 2016 Design , Automation Test in Europe Conference Exhibition (DATE). 1273--1278 . A. Li, S. L. Song, A. Kumar, E. Z. Zhang, D. Chavarr\u00eda-Miranda, and H. Corporaal. 2016. Critical points based register-concurrency auto-tuning for GPUs. In 2016 Design, Automation Test in Europe Conference Exhibition (DATE). 1273--1278."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/353926.353929"},{"key":"e_1_3_2_2_34_1","volume-title":"Linear Scan Register Allocation in the Context of SSA Form and Register Constraints","author":"M\u00f6ssenb\u00f6ck Hanspeter","unstructured":"Hanspeter M\u00f6ssenb\u00f6ck and Michael Pfeiffer . 2002. Linear Scan Register Allocation in the Context of SSA Form and Register Constraints . Springer Berlin Heidelberg , 229--246. Hanspeter M\u00f6ssenb\u00f6ck and Michael Pfeiffer. 2002. Linear Scan Register Allocation in the Context of SSA Form and Register Constraints. Springer Berlin Heidelberg, 229--246."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2694344.2694364"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/169627.169839"},{"key":"e_1_3_2_2_38_1","unstructured":"NVCC 2017. NVIDIA CUDA Compiler Driver NVCC. docs.nvidia.com\/cuda\/cuda-compiler-driver-nvcc. (2017).  NVCC 2017. NVIDIA CUDA Compiler Driver NVCC. docs.nvidia.com\/cuda\/cuda-compiler-driver-nvcc. (2017)."},{"key":"e_1_3_2_2_39_1","unstructured":"NVprof 2017. NVIDIA Profiler. http:\/\/docs.nvidia.com\/cuda\/profiler-users-guide. (2017).  NVprof 2017. NVIDIA Profiler. http:\/\/docs.nvidia.com\/cuda\/profiler-users-guide. (2017)."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/155090.155114"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/330249.330250"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/1375581.1375609"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/2716282.2716290"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2967938.2967967"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669123"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.5555\/1759937.1759950"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/321607.321620"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/996841.996875"},{"issue":"3","key":"e_1_3_2_2_49_1","first-page":"3","article-title":"Using The GNU Compiler Collection","volume":"4","author":"Stallman Richard M.","year":"2009","unstructured":"Richard M. Stallman and GCC Developer Community . 2009 . Using The GNU Compiler Collection : A GNU Manual For GCC Version 4 . 3 . 3 . CreateSpace, Paramount, CA. Richard M. Stallman and GCC Developer Community. 2009. Using The GNU Compiler Collection: A GNU Manual For GCC Version 4.3.3. CreateSpace, Paramount, CA.","journal-title":"A GNU Manual For GCC Version"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/2594291.2594342"},{"key":"e_1_3_2_2_51_1","unstructured":"SW4 2014. Seismic Wave Modelling (SW4) - Computational Infrastructure for Geodynamics. https:\/\/geodynamics.org\/cig\/software\/sw4\/. (2014).  SW4 2014. Seismic Wave Modelling (SW4) - Computational Infrastructure for Geodynamics. https:\/\/geodynamics.org\/cig\/software\/sw4\/. (2014)."},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1142\/S012962640400188X"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-28652-0_2"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.21"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749246.2749255"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/192724.192734"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/2854038.2854041"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830813"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1142\/S0129626497000401"}],"event":{"name":"PPoPP '18: 23nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","location":"Vienna Austria","acronym":"PPoPP '18","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the 23rd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3178487.3178500","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3178487.3178500","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:39:08Z","timestamp":1750196348000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3178487.3178500"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,2,10]]},"references-count":58,"alternative-id":["10.1145\/3178487.3178500","10.1145\/3178487"],"URL":"https:\/\/doi.org\/10.1145\/3178487.3178500","relation":{"is-identical-to":[{"id-type":"doi","id":"10.1145\/3200691.3178500","asserted-by":"object"}]},"subject":[],"published":{"date-parts":[[2018,2,10]]},"assertion":[{"value":"2018-02-10","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}