{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T12:16:46Z","timestamp":1763468206129,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2014,8,24]],"date-time":"2014-08-24T00:00:00Z","timestamp":1408838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000144","name":"Division of Computer and Network Systems","doi-asserted-by":"publisher","award":["CNS-0964478"],"award-info":[{"award-number":["CNS-0964478"]}],"id":[{"id":"10.13039\/100000144","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["SHF-1217917"],"award-info":[{"award-number":["SHF-1217917"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2014,8,24]]},"DOI":"10.1145\/2628071.2628072","type":"proceedings-article","created":{"date-parts":[[2014,8,21]],"date-time":"2014-08-21T12:19:23Z","timestamp":1408623563000},"page":"431-442","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["D\n            <sup>2<\/sup>\n            MA"],"prefix":"10.1145","author":[{"given":"D. Anoushe","family":"Jamshidi","sequence":"first","affiliation":[{"name":"University of Michigan, Ann Arbor, MI, USA"}]},{"given":"Mehrzad","family":"Samadi","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, MI, USA"}]},{"given":"Scott","family":"Mahlke","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, MI, USA"}]}],"member":"320","published-online":{"date-parts":[[2014,8,24]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"\"GPGPU-Sim \" http:\/\/gpgpu-sim.org.  \"GPGPU-Sim \" http:\/\/gpgpu-sim.org."},{"key":"e_1_3_2_1_2_1","unstructured":"\"NVIDIA GPU Computing SDK \" http:\/\/developer.nvidia.com\/gpu-computing-sdk.  \"NVIDIA GPU Computing SDK \" http:\/\/developer.nvidia.com\/gpu-computing-sdk."},{"key":"e_1_3_2_1_3_1","first-page":"163","volume-title":"Apr. 2009","author":"Bakhoda A.","unstructured":"A. Bakhoda , G. L. Yuan , W. W. L. Fung , H. Wong , and T. M. Aamodt , \" Analyzing CUDA workloads using a detailed GPU simulator,\" in Proc. of the 2009 IEEE Symposium on Performance Analysis of Systems and Software , Apr. 2009 , pp. 163 -- 174 . A. Bakhoda, G. L. Yuan, W. W. L. Fung, H. Wong, and T. M. Aamodt, \"Analyzing CUDA workloads using a detailed GPU simulator,\" in Proc. of the 2009 IEEE Symposium on Performance Analysis of Systems and Software, Apr. 2009, pp. 163--174."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/2063384.2063400"},{"key":"e_1_3_2_1_5_1","first-page":"1","volume-title":"Hardware implementation of micropolygon rasterization with motion and defocus blur,\" in Proceedings of the 2010 Conference on High Performance Graphics","author":"Brunhaver J. S.","year":"2010","unstructured":"J. S. Brunhaver , K. Fatahalian , and P. Hanrahan , \" Hardware implementation of micropolygon rasterization with motion and defocus blur,\" in Proceedings of the 2010 Conference on High Performance Graphics , 2010 , pp. 1 -- 9 . J. S. Brunhaver, K. Fatahalian, and P. Hanrahan, \"Hardware implementation of micropolygon rasterization with motion and defocus blur,\" in Proceedings of the 2010 Conference on High Performance Graphics, 2010, pp. 1--9."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2010.5650274"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1147\/rd.515.0559"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2000064.2000093"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.18"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/1950365.1950409"},{"key":"e_1_3_2_1_12_1","volume-title":"The Art of Electronics","author":"Horowitz P.","year":"1991","unstructured":"P. Horowitz and W. Hill , The Art of Electronics , Second Edition. Cambridge University Press , 1991 . P. Horowitz and W. Hill, The Art of Electronics, Second Edition. Cambridge University Press, 1991."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485951"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2451116.2451158"},{"key":"e_1_3_2_1_15_1","first-page":"157","volume-title":"Neither more nor less: Optimizing thread-level parallelism for gpgpus,\" in Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques","author":"Kayiran O.","year":"2013","unstructured":"O. Kayiran , A. Jog , M. T. Kandemir , and C. R. Das , \" Neither more nor less: Optimizing thread-level parallelism for gpgpus,\" in Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques , 2013 , pp. 157 -- 166 . O. Kayiran, A. Jog, M. T. Kandemir, and C. R. Das, \"Neither more nor less: Optimizing thread-level parallelism for gpgpus,\" in Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques, 2013, pp. 157--166."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2011.89"},{"key":"e_1_3_2_1_17_1","unstructured":"KHRONOS Group \"OpenCL - the open standard for parallel programming of heterogeneous systems \" 2013. {Online}. Available: http:\/\/www.khronos.org  KHRONOS Group \"OpenCL - the open standard for parallel programming of heterogeneous systems \" 2013. {Online}. Available: http:\/\/www.khronos.org"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/L-CA.2011.32"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2010.44"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155656"},{"volume-title":"Nvidia's next generation CUDA compute architecture","year":"2009","key":"e_1_3_2_1_21_1","unstructured":"NVIDIA, \"Fermi : Nvidia's next generation CUDA compute architecture ,\" 2009 , http:\/\/www.nvidia.com\/content\/PDF\/fermi_white_papers\/NVIDIA_Fermi_Compute_Architecture_Whitepaper.pdf. NVIDIA, \"Fermi: Nvidia's next generation CUDA compute architecture,\" 2009, http:\/\/www.nvidia.com\/content\/PDF\/fermi_white_papers\/NVIDIA_Fermi_Compute_Architecture_Whitepaper.pdf."},{"key":"e_1_3_2_1_22_1","unstructured":"CUDA C Programming Guide NVIDIA Oct. 2012.  CUDA C Programming Guide NVIDIA Oct. 2012."},{"key":"e_1_3_2_1_23_1","volume-title":"Kepler GK110","author":"CUDA","year":"2012","unstructured":"NVIDIA, \"NVIDIA's next generation CUDA compute architecture : Kepler GK110 ,\" 2012 , www.nvidia.com\/content\/PDF\/NVIDIA_Kepler_GK110_Architecture_Whitepaper.pdf. NVIDIA, \"NVIDIA's next generation CUDA compute architecture: Kepler GK110,\" 2012, www.nvidia.com\/content\/PDF\/NVIDIA_Kepler_GK110_Architecture_Whitepaper.pdf."},{"key":"e_1_3_2_1_24_1","volume-title":"NVIDIA geforce GTX 680","author":"CUDA","year":"2012","unstructured":"NVIDIA, \"NVIDIA's next generation CUDA compute architecture : NVIDIA geforce GTX 680 ,\" 2012 , http:\/\/www.geforce.com\/Active\/en_US\/en_US\/pdf\/GeForce-GTX-680-Whitepaper-FINAL.pdf. ___, \"Whitepaper: NVIDIA geforce GTX 680,\" 2012, http:\/\/www.geforce.com\/Active\/en_US\/en_US\/pdf\/GeForce-GTX-680-Whitepaper-FINAL.pdf."},{"key":"e_1_3_2_1_25_1","volume-title":"A new era in mobile computing","author":"CUDA","year":"2014","unstructured":"NVIDIA, \"NVIDIA's next generation CUDA compute architecture : A new era in mobile computing ,\" 2014 , http:\/\/www.nvidia.com\/content\/PDF\/tegra_white_papers\/tegra-K1-whitepaper.pdf. ___, \"NVIDIA Tegra K1: A new era in mobile computing,\" 2014, http:\/\/www.nvidia.com\/content\/PDF\/tegra_white_papers\/tegra-K1-whitepaper.pdf."},{"volume-title":"Built for science","year":"2012","key":"e_1_3_2_1_26_1","unstructured":"Oak Ridge Leadership Computing Facility, \"Titan : Built for science ,\" 2012 , http:\/\/www.olcf.ornl.gov\/wp-content\/themes\/olcf\/titan\/Titan_BuiltForScience.pdf. Oak Ridge Leadership Computing Facility, \"Titan: Built for science,\" 2012, http:\/\/www.olcf.ornl.gov\/wp-content\/themes\/olcf\/titan\/Titan_BuiltForScience.pdf."},{"key":"e_1_3_2_1_27_1","volume-title":"Computer Organization and Design: The Hardware\/Software Interface","author":"Patterson D. A.","year":"2012","unstructured":"D. A. Patterson and J. L. Hennessy , Computer Organization and Design: The Hardware\/Software Interface , Fourth Edition. Elsevier , 2012 . D. A. Patterson and J. L. Hennessy, Computer Organization and Design: The Hardware\/Software Interface, Fourth Edition. Elsevier, 2012."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540747"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540717"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.16"},{"key":"e_1_3_2_1_31_1","first-page":"73","volume-title":"APOGEE: Adaptive prefetching on GPUs for energy efficiency,\" in Proc. of the 22nd International Conference on Parallel Architectures and Compilation Techniques","author":"Sethia A.","year":"2013","unstructured":"A. Sethia , G. Dasika , M. Samadi , and S. Mahlke , \" APOGEE: Adaptive prefetching on GPUs for energy efficiency,\" in Proc. of the 22nd International Conference on Parallel Architectures and Compilation Techniques , 2013 , pp. 73 -- 82 . A. Sethia, G. Dasika, M. Samadi, and S. Mahlke, \"APOGEE: Adaptive prefetching on GPUs for energy efficiency,\" in Proc. of the 22nd International Conference on Parallel Architectures and Compilation Techniques, 2013, pp. 73--82."}],"event":{"name":"PACT '14: International Conference on Parallel Architectures and Compilation","sponsor":["IFIP WG 10.3 IFIP WG 10.3","SIGARCH ACM Special Interest Group on Computer Architecture","IEEE CS TCPP IEEE Computer Society Technical Committee on Parallel Processing","IEEE CS TCAA IEEE CS technical committee on architectural acoustics"],"location":"Edmonton AB Canada","acronym":"PACT '14"},"container-title":["Proceedings of the 23rd international conference on Parallel architectures and compilation"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2628071.2628072","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2628071.2628072","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T07:19:38Z","timestamp":1750231178000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2628071.2628072"}},"subtitle":["accelerating coarse-grained data transfer for GPUs"],"short-title":[],"issued":{"date-parts":[[2014,8,24]]},"references-count":31,"alternative-id":["10.1145\/2628071.2628072","10.1145\/2628071"],"URL":"https:\/\/doi.org\/10.1145\/2628071.2628072","relation":{},"subject":[],"published":{"date-parts":[[2014,8,24]]},"assertion":[{"value":"2014-08-24","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}