{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T03:34:40Z","timestamp":1769830480277,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2015,2,7]],"date-time":"2015-02-07T00:00:00Z","timestamp":1423267200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2015,2,7]]},"DOI":"10.1145\/2716282.2716291","type":"proceedings-article","created":{"date-parts":[[2015,2,3]],"date-time":"2015-02-03T13:43:17Z","timestamp":1422970997000},"page":"36-47","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":24,"title":["Efficient utilization of GPGPU cache hierarchy"],"prefix":"10.1145","author":[{"given":"Mahmoud","family":"Khairy","sequence":"first","affiliation":[{"name":"Cairo University, Egypt"}]},{"given":"Mohamed","family":"Zahran","sequence":"additional","affiliation":[{"name":"New York University, USA"}]},{"given":"Amr G.","family":"Wassal","sequence":"additional","affiliation":[{"name":"Cairo University, Egypt"}]}],"member":"320","published-online":{"date-parts":[[2015,2,7]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"AMD. AMD\u2019s Graphics Core Next Arhcitecure whitepaper.  AMD. AMD\u2019s Graphics Core Next Arhcitecure whitepaper."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2008.05.014"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/SASP.2008.4570793"},{"key":"e_1_3_2_1_6_1","volume-title":"Hwu. Adaptive Cache Management for Energy-efficient GPU Computing. In Proceedings of the 47th Annual IEEE\/ACM International Symposium on Microarchitecture","author":"Chen X.","year":"2014"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2613908.2613909"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2010.36"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1735688.1735702"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.43"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.12"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/263580.263599"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/InPar.2012.6339595"},{"issue":"27","key":"e_1_3_2_1_14_1","first-page":"10","volume":"9","author":"Gwennap L.","year":"2010","journal-title":"Microprocessor Report"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.1987.5009496"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/12.40842"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835938"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2451116.2451158"},{"key":"e_1_3_2_1_19_1","first-page":"166","volume-title":"Proceedings of the 22nd international conference on Parallel architectures and compilation techniques","author":"Kay\u0131ran O."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2011.89"},{"key":"e_1_3_2_1_21_1","volume-title":"IEEE Proceedings-","author":"Kharbutli M.","year":"2004"},{"key":"e_1_3_2_1_22_1","volume-title":"Morgan Kaufmann","author":"Kirk D.","year":"2010"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.1982.1676020"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2012.6168947"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835937"},{"key":"e_1_3_2_1_26_1","unstructured":"D. Li. Orchestrating Thread Scheduling and Cache Management to Improve Memory System Throughput in Throughput Processors. PhD thesis The University Of Texas At Austin May 2014.  D. Li. Orchestrating Thread Scheduling and Cache Management to Improve Memory System Throughput in Throughput Processors. PhD thesis The University Of Texas At Austin May 2014."},{"key":"e_1_3_2_1_27_1","unstructured":"Mathworld. mathworld.wolfram.com\/IrreduciblePolynomial.html.  Mathworld. mathworld.wolfram.com\/IrreduciblePolynomial.html."},{"key":"e_1_3_2_1_28_1","volume-title":"Micro-benchmarking the C2070","author":"Meltzer R.","year":"2013"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2010.41"},{"key":"e_1_3_2_1_30_1","unstructured":"NVIDIA. CUDA C Programming Guide v5.5.  NVIDIA. CUDA C Programming Guide v5.5."},{"key":"e_1_3_2_1_31_1","unstructured":"NVIDIA. CUDA C\/C++ SDK Code Samples. http:\/\/developer.nvidia.com\/cuda-cc-sdk-codesamples.  NVIDIA. CUDA C\/C++ SDK Code Samples. http:\/\/developer.nvidia.com\/cuda-cc-sdk-codesamples."},{"key":"e_1_3_2_1_32_1","unstructured":"NVIDIA. NVIDIA Next Generation CUDA Compute Architecture: Kepler GK110.  NVIDIA. NVIDIA Next Generation CUDA Compute Architecture: Kepler GK110."},{"key":"e_1_3_2_1_33_1","unstructured":"OpenCL. The OpenCL Specification version 2.0. http:\/\/www.khronos.org.  OpenCL. The OpenCL Specification version 2.0. http:\/\/www.khronos.org."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2008.917757"},{"key":"e_1_3_2_1_35_1","volume-title":"GPU Performance Analysis and Optimization","author":"Micikevicius Paulius","year":"2012"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2005.52"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/115952.115961"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.16"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540718"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/165123.165152"},{"key":"e_1_3_2_1_41_1","unstructured":"G. S. Sohi. Logical data skewing schemes for interleaved memories in vector processors. 1988.  G. S. Sohi. Logical data skewing schemes for interleaved memories in vector processors. 1988."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.5555\/266800.266808"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2011.24"},{"key":"e_1_3_2_1_44_1","author":"Zheng Z.","year":"2014","journal-title":"Adaptive Cache and Concurrency Allocation on GPGPUs. Computer Architecture Letters"}],"event":{"name":"GPGPU-8: General-purpose Processing with Graphics Processing Units 8","location":"San Francisco CA USA","acronym":"GPGPU-8","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages"]},"container-title":["Proceedings of the 8th Workshop on General Purpose Processing using GPUs"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2716282.2716291","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2716282.2716291","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T07:00:42Z","timestamp":1750230042000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2716282.2716291"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,2,7]]},"references-count":44,"alternative-id":["10.1145\/2716282.2716291","10.1145\/2716282"],"URL":"https:\/\/doi.org\/10.1145\/2716282.2716291","relation":{},"subject":[],"published":{"date-parts":[[2015,2,7]]},"assertion":[{"value":"2015-02-07","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}