{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T17:16:59Z","timestamp":1764350219764,"version":"3.40.3"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319099668"},{"type":"electronic","value":"9783319099675"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-09967-5_8","type":"book-chapter","created":{"date-parts":[[2014,9,30]],"date-time":"2014-09-30T15:10:04Z","timestamp":1412089804000},"page":"136-151","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Parametric GPU Code Generation for Affine Loop Programs"],"prefix":"10.1007","author":[{"given":"Athanasios","family":"Konstantinidis","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Paul H. J.","family":"Kelly","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"J.","family":"Ramanujam","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"P.","family":"Sadayappan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2014,10,1]]},"reference":[{"key":"8_CR1","volume-title":"In: Compilers: Principles, Techniques, and Tools","author":"A Aho","year":"2007","unstructured":"Aho, A., Lam, M., Sethi, R., Ullman, J.: Optimizing for parallelism and locality. In: Compilers: Principles, Techniques, and Tools. Pearson\/Addison Wesley, Boston (2007)"},{"issue":"4","key":"8_CR2","doi-asserted-by":"publisher","first-page":"491","DOI":"10.1145\/29873.29875","volume":"9","author":"R Allen","year":"1987","unstructured":"Allen, R., Kennedy, K.: Automatic translation of fortran programs to vector form. ACM Trans. Program. Lang. Syst. (TOPLAS) 9(4), 491\u2013542 (1987)","journal-title":"ACM Trans. Program. Lang. Syst. (TOPLAS)"},{"doi-asserted-by":"crossref","unstructured":"Ancourt, C., Irigoin, F.: Scanning polyhedra with DO loops. In: ACM Sigplan Notices, vol. 26, pp. 39\u201350. ACM (1991)","key":"8_CR3","DOI":"10.1145\/109626.109631"},{"doi-asserted-by":"crossref","unstructured":"Baskaran, M.M., Hartono, A., Tavarageri, S., Henretty, T., Ramanujam, J., Sadayappan, P.: Parameterized tiling revisited. In: CGO. ACM (2010)","key":"8_CR4","DOI":"10.1145\/1772954.1772983"},{"key":"8_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"244","DOI":"10.1007\/978-3-642-11970-5_14","volume-title":"Compiler Construction","author":"MM Baskaran","year":"2010","unstructured":"Baskaran, M.M., Ramanujam, J., Sadayappan, P.: Automatic C-to-CUDA code generation for affine programs. In: Gupta, R. (ed.) CC 2010. LNCS, vol. 6011, pp. 244\u2013263. Springer, Heidelberg (2010)"},{"unstructured":"Bastoul, C.: Code generation in the polyhedral model is easier than you think. In: PACT (2004)","key":"8_CR6"},{"key":"8_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"320","DOI":"10.1007\/3-540-36579-6_23","volume-title":"Compiler Construction","author":"C Bastoul","year":"2003","unstructured":"Bastoul, C., Feautrier, P.: Improving data locality by chunking. In: Hedin, G. (ed.) CC 2003. LNCS, vol. 2622, pp. 320\u2013334. Springer, Heidelberg (2003)"},{"doi-asserted-by":"crossref","unstructured":"Bondhugula, U., Hartono, A., Ramanujam, J., Sadayappan, P.: A practical automatic polyhedral parallelizer and locality optimizer. In: PLDI. ACM (2008)","key":"8_CR8","DOI":"10.1145\/1375581.1375595"},{"issue":"5","key":"8_CR9","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1007\/BF01407835","volume":"21","author":"P Feautrier","year":"1992","unstructured":"Feautrier, P.: Some efficient solutions to the affine scheduling problem. Part i. One-dimensional time. Int. J. Parallel Prog. 21(5), 313\u2013347 (1992)","journal-title":"Int. J. Parallel Prog."},{"issue":"6","key":"8_CR10","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1007\/BF01379404","volume":"21","author":"P Feautrier","year":"1992","unstructured":"Feautrier, P.: Some efficient solutions to the affine scheduling problem. Part II. Multidimensional time. Int. J. Parallel Prog. 21(6), 389\u2013420 (1992)","journal-title":"Int. J. Parallel Prog."},{"doi-asserted-by":"crossref","unstructured":"Grosser, T., Cohen, A., Kelly, P.H., Ramanujam, J., Sadayappan, P., Verdoolaege, S.: Split tiling for GPUs: automatic parallelization using trapezoidal tiles. In: GPGPU. ACM (2013)","key":"8_CR11","DOI":"10.1145\/2458523.2458526"},{"doi-asserted-by":"crossref","unstructured":"Hartono, A., Baskaran, M.M., Bastoul, C., Cohen, A., Krishnamoorthy, S., Norris, B., Ramanujam, J., Sadayappan, P.: Parametric multi-level tiling of imperfectly nested loops. In: Supercomputing, pp. 147\u2013157. ACM (2009)","key":"8_CR12","DOI":"10.1145\/1542275.1542301"},{"doi-asserted-by":"crossref","unstructured":"Hartono, A., Baskaran, M.M., Ramanujam, J., Sadayappan, P.: DynTile: parametric tiled loop generation for parallel execution on multicore processors. In: IPDPS. IEEE (2010)","key":"8_CR13","DOI":"10.1109\/IPDPS.2010.5470459"},{"doi-asserted-by":"crossref","unstructured":"Holewinski, J., Pouchet, L.N., Sadayappan, P.: High-performance code generation for stencil computations on GPU architectures. In: Proceedings of the 26th ACM International Conference on Supercomputing, pp. 311\u2013320. ACM (2012)","key":"8_CR14","DOI":"10.1145\/2304576.2304619"},{"doi-asserted-by":"crossref","unstructured":"Irigoin, F., Triolet, R.: Supernode partitioning. In: POPL. ACM (1988)","key":"8_CR15","DOI":"10.1145\/73560.73588"},{"unstructured":"Kim, D., Rajopadhye, S.: Parameterized Tiling for Imperfectly Nested Loops","key":"8_CR16"},{"doi-asserted-by":"crossref","unstructured":"Kim, D., Renganarayanan, L., Rostron, D., Rajopadhye, S., Strout, M.M.: Multi-level tiling: M for the price of one. In: Proceedings of the 2007 ACM\/IEEE Conference on Supercomputing, p. 51. ACM (2007)","key":"8_CR17","DOI":"10.1145\/1362622.1362691"},{"doi-asserted-by":"crossref","unstructured":"Krishnamoorthy, S., Baskaran, M., Bondhugula, U., Ramanujam, J., Rountev, A., Sadayappan, P.: Effective automatic parallelization of stencil computations. In: ACM Sigplan Notices, vol. 42, pp. 235\u2013244. ACM (2007)","key":"8_CR18","DOI":"10.1145\/1273442.1250761"},{"doi-asserted-by":"crossref","unstructured":"Meng, J., Skadron, K.: Performance modeling and automatic ghost zone optimization for iterative stencil loops on GPUs. In: Supercomputing. ACM (2009)","key":"8_CR19","DOI":"10.1145\/1542275.1542313"},{"issue":"6","key":"8_CR20","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1145\/1273442.1250780","volume":"42","author":"L Renganarayanan","year":"2007","unstructured":"Renganarayanan, L., Kim, D., Rajopadhye, S., Strout, M.M.: Parameterized tiled loops for free. ACM SIGPLAN Not. 42(6), 405\u2013414 (2007)","journal-title":"ACM SIGPLAN Not."},{"key":"8_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"136","DOI":"10.1007\/978-3-642-19595-2_10","volume-title":"Languages and Compilers for Parallel Computing","author":"G Rudy","year":"2011","unstructured":"Rudy, G., Khan, M.M., Hall, M., Chen, C., Chame, J.: A programming language interface to describe transformations and code generation. In: Cooper, K., Mellor-Crummey, J., Sarkar, V. (eds.) LCPC 2010. LNCS, vol. 6548, pp. 136\u2013150. Springer, Heidelberg (2011)"},{"unstructured":"Ruetsch, G., Micikevicius, P.: Optimizing matrix transpose in CUDA. NVIDIA CUDA SDK Application Note (2009)","key":"8_CR22"},{"key":"8_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"299","DOI":"10.1007\/978-3-642-15582-6_49","volume-title":"Mathematical Software \u2013 ICMS 2010","author":"S Verdoolaege","year":"2010","unstructured":"Verdoolaege, S.: An integer set library for the polyhedral model. In: Fukuda, K., Hoeven, J., Joswig, M., Takayama, N. (eds.) ICMS 2010. LNCS, vol. 6327, pp. 299\u2013302. Springer, Heidelberg (2010)"},{"issue":"4","key":"8_CR24","first-page":"54","volume":"9","author":"S Verdoolaege","year":"2013","unstructured":"Verdoolaege, S., Juega, J.C., Cohen, A., G\u00f3mez, J.I., Tenllado, C., Catthoor, F.: Polyhedral parallel code generation for CUDA. ACM Trans. Archit. Code Optim. (TACO) 9(4), 54 (2013)","journal-title":"ACM Trans. Archit. Code Optim. (TACO)"},{"issue":"4","key":"8_CR25","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF01407876","volume":"15","author":"M Wolfe","year":"1986","unstructured":"Wolfe, M.: Loops skewing: the wavefront method revisited. Int. J. Parallel Prog. 15(4), 279\u2013293 (1986)","journal-title":"Int. J. Parallel Prog."},{"doi-asserted-by":"crossref","unstructured":"Wolfe, M.: More iteration space tiling. In: Proceedings of the 1989 ACM\/IEEE Conference on Supercomputing, pp. 655\u2013664. ACM (1989)","key":"8_CR26","DOI":"10.1145\/76263.76337"},{"doi-asserted-by":"crossref","unstructured":"Yang, Y., Xiang, P., Kong, J., Zhou, H.: A GPGPU compiler for memory optimization and parallelism management. In: ACM Sigplan Notices, vol. 45, pp. 86\u201397. ACM (2010)","key":"8_CR27","DOI":"10.1145\/1809028.1806606"}],"container-title":["Lecture Notes in Computer Science","Languages and Compilers for Parallel Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-09967-5_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,20]],"date-time":"2023-02-20T11:01:28Z","timestamp":1676890888000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-09967-5_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319099668","9783319099675"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-09967-5_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"1 October 2014","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}