{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,5]],"date-time":"2025-08-05T13:05:00Z","timestamp":1754399100595,"version":"3.38.0"},"reference-count":14,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010,7]]},"DOI":"10.1109\/icsamos.2010.5642066","type":"proceedings-article","created":{"date-parts":[[2010,11,30]],"date-time":"2010-11-30T21:34:08Z","timestamp":1291152848000},"page":"200-207","source":"Crossref","is-referenced-by-count":11,"title":["Compile-time GPU memory access optimizations"],"prefix":"10.1109","author":[{"given":"Gert-Jan","family":"van den Braak","sequence":"first","affiliation":[]},{"given":"Bart","family":"Mesman","sequence":"additional","affiliation":[]},{"given":"Henk","family":"Corporaal","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"journal-title":"OpenCL Programming Guide for the CUDA Architecture","year":"2009","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/178243.178259"},{"key":"ref12","article-title":"CUDA-lite: Reducing GPU Programming Complexity","author":"ueng","year":"2008","journal-title":"LCPC 2008"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"101","DOI":"10.1145\/1504176.1504194","article-title":"OpenMP to GPGPU: A Compiler Framework for Automatic Translation and Optimization","author":"lee","year":"2009","journal-title":"PPoPP &#x2018;09 Proceedings of the 14th ACM SIGPLAN symposium on Principles and practice of parallel programming"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"152","DOI":"10.1145\/1555815.1555775","article-title":"An analytical model for a GPU architecture with memory-level and thread-level parallelism awareness","volume":"37","author":"hong","year":"2009","journal-title":"SIGARCH Comput Archit News"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"journal-title":"NVIDIA CUDA Programming Guide Version 2 2","year":"2009","key":"ref3"},{"journal-title":"High Performance Compilers for Parallel Computing","year":"1996","author":"wolfe","key":"ref6"},{"journal-title":"Optimizing Matrix Transpose in CUDA","year":"2009","author":"ruetsch","key":"ref5"},{"journal-title":"PTX Parallel Thread Execution ISA Version 1 4","year":"2008","key":"ref8"},{"journal-title":"The CUDA Compiler Driver NVCC - Version 2 0","year":"2006","key":"ref7"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2008.31"},{"journal-title":"Feature Extraction & Image Processing","year":"2002","author":"nixon","key":"ref1"},{"year":"2008","author":"van der laan","key":"ref9"}],"event":{"name":"2010 International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS X)","start":{"date-parts":[[2010,7,19]]},"location":"Samos, Greece","end":{"date-parts":[[2010,7,22]]}},"container-title":["2010 International Conference on Embedded Computer Systems: Architectures, Modeling and Simulation"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/5628951\/5642040\/05642066.pdf?arnumber=5642066","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T09:40:46Z","timestamp":1740735646000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5642066\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,7]]},"references-count":14,"URL":"https:\/\/doi.org\/10.1109\/icsamos.2010.5642066","relation":{},"subject":[],"published":{"date-parts":[[2010,7]]}}}