{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T10:50:55Z","timestamp":1769511055077,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2013,12,7]],"date-time":"2013-12-07T00:00:00Z","timestamp":1386374400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100007065","name":"Nvidia","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100007065","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2013,12,7]]},"DOI":"10.1145\/2540708.2540718","type":"proceedings-article","created":{"date-parts":[[2013,12,17]],"date-time":"2013-12-17T13:36:21Z","timestamp":1387287381000},"page":"99-110","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":119,"title":["Divergence-aware warp scheduling"],"prefix":"10.1145","author":[{"given":"Timothy G.","family":"Rogers","sequence":"first","affiliation":[{"name":"University of British Columbia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mike","family":"O'Connor","sequence":"additional","affiliation":[{"name":"NVIDIA Research"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tor M.","family":"Aamodt","sequence":"additional","affiliation":[{"name":"University of British Columbia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2013,12,7]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"NVIDIA CUDA C Programming Guide v 4.2 2012.  NVIDIA CUDA C Programming Guide v 4.2 2012."},{"key":"e_1_3_2_1_2_1","unstructured":"T. M. Aamodt etal GPGPU-Sim 3.x Manual. http:\/\/gpgpu-sim.org\/manual\/index.php5\/GPGPU-Sim_3.x_Manual 2012.  T. M. Aamodt et al. GPGPU-Sim 3.x Manual. http:\/\/gpgpu-sim.org\/manual\/index.php5\/GPGPU-Sim_3.x_Manual 2012."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1837855.1806653"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611971538"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/1654059.1654078"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2010.39"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1735688.1735702"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2024723.2000108"},{"key":"e_1_3_2_1_11_1","first-page":"25","volume-title":"Thread Block Compaction for Efficient SIMT Control Flow. In HPCA","author":"Fung W.","year":"2011","unstructured":"W. Fung and T. Aamodt . Thread Block Compaction for Efficient SIMT Control Flow. In HPCA 2011 , pages 25 -- 36 . W. Fung and T. Aamodt. Thread Block Compaction for Efficient SIMT Control Flow. In HPCA 2011, pages 25--36."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.12"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2024723.2000093"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/L-CA.2009.4"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2012.6189209"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2038037.1941590"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2189750.2151003"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/1816038.1815971"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2304576.2304582"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2499368.2451158"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2508148.2485951"},{"key":"e_1_3_2_1_22_1","volume-title":"Neither More Nor Less: Optimizing Thread-level Parallelism for GPGPUs. In PACT","author":"Kayiran O.","year":"2013","unstructured":"O. Kayiran Neither More Nor Less: Optimizing Thread-level Parallelism for GPGPUs. In PACT 2013 . O. Kayiran et al. Neither More Nor Less: Optimizing Thread-level Parallelism for GPGPUs. In PACT 2013."},{"key":"e_1_3_2_1_23_1","unstructured":"Khronos Group. OpenCL. http:\/\/www.khronos.org\/opencl\/.  Khronos Group. OpenCL. http:\/\/www.khronos.org\/opencl\/."},{"key":"e_1_3_2_1_24_1","volume-title":"Effect of Instruction Fetch and Memory Scheduling on GPU Performance. In Workshop on Language, Compiler, and Architecture Support for GPGPU","author":"Lakshminarayana N. B.","year":"2010","unstructured":"N. B. Lakshminarayana and H. Kim . Effect of Instruction Fetch and Memory Scheduling on GPU Performance. In Workshop on Language, Compiler, and Architecture Support for GPGPU , 2010 . N. B. Lakshminarayana and H. Kim. Effect of Instruction Fetch and Memory Scheduling on GPU Performance. In Workshop on Language, Compiler, and Architecture Support for GPGPU, 2010."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2010.44"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2508148.2485964"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2008.31"},{"key":"e_1_3_2_1_28_1","volume-title":"ISPASS","author":"Maas M.","year":"2013","unstructured":"M. Maas How a Single Chip Causes Massive Power Bills GPUSimPow: A GPGPU Power Simulator . In ISPASS 2013 . M. Maas et al. How a Single Chip Causes Massive Power Bills GPUSimPow: A GPGPU Power Simulator. In ISPASS 2013."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/1816038.1815992"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155656"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/1273440.1250709"},{"key":"e_1_3_2_1_32_1","unstructured":"T. G. Rogers. CCWS Simulation Infrastructure. http:\/\/www.ece.ubc.ca\/~tgrogers\/ccws.html 2013.  T. G. Rogers. CCWS Simulation Infrastructure. http:\/\/www.ece.ubc.ca\/~tgrogers\/ccws.html 2013."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.16"},{"key":"e_1_3_2_1_34_1","volume-title":"Application Accelerators in High Performance Computing","author":"Rul S.","year":"2010","unstructured":"S. Rul An Experimental Study on Performance Portability of OpenCL Kernels . In Application Accelerators in High Performance Computing , 2010 . S. Rul et al. An Experimental Study on Performance Portability of OpenCL Kernels. In Application Accelerators in High Performance Computing, 2010."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/225830.224451"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/1064979.1064989"},{"issue":"5","key":"e_1_3_2_1_37_1","first-page":"677","article-title":"CACTI: An Enhanced Cache Access and Cycle Time Model. Solid-State Circuits","volume":"31","author":"Wilton S.","year":"1996","unstructured":"S. Wilton and N. Jouppi . CACTI: An Enhanced Cache Access and Cycle Time Model. Solid-State Circuits , IEEE Journal of , 31 ( 5 ): 677 -- 688 , May 1996 . S. Wilton and N. Jouppi. CACTI: An Enhanced Cache Access and Cycle Time Model. Solid-State Circuits, IEEE Journal of, 31(5):677--688, May 1996.","journal-title":"IEEE Journal of"}],"event":{"name":"MICRO-46: The 46th Annual IEEE\/ACM International Symposium on Microarchitecture","location":"Davis California","acronym":"MICRO-46","sponsor":["SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing","IEEE CS"]},"container-title":["Proceedings of the 46th Annual IEEE\/ACM International Symposium on Microarchitecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2540708.2540718","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2540708.2540718","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T08:10:33Z","timestamp":1750234233000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2540708.2540718"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,12,7]]},"references-count":37,"alternative-id":["10.1145\/2540708.2540718","10.1145\/2540708"],"URL":"https:\/\/doi.org\/10.1145\/2540708.2540718","relation":{},"subject":[],"published":{"date-parts":[[2013,12,7]]},"assertion":[{"value":"2013-12-07","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}