{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,27]],"date-time":"2026-02-27T03:46:22Z","timestamp":1772163982861,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2013,6,23]],"date-time":"2013-06-23T00:00:00Z","timestamp":1371945600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2013,6,23]]},"DOI":"10.1145\/2485922.2485954","type":"proceedings-article","created":{"date-parts":[[2013,6,25]],"date-time":"2013-06-25T15:13:21Z","timestamp":1372173201000},"page":"368-379","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":22,"title":["SIMD divergence optimization through intra-warp compaction"],"prefix":"10.1145","author":[{"given":"Aniruddha S.","family":"Vaidya","sequence":"first","affiliation":[{"name":"Intel Corporation, Santa Clara, CA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anahita","family":"Shayesteh","sequence":"additional","affiliation":[{"name":"Intel Corporation, Santa Clara, CA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dong Hyuk","family":"Woo","sequence":"additional","affiliation":[{"name":"Intel Corporation, Santa Clara, CA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Roy","family":"Saharoy","sequence":"additional","affiliation":[{"name":"Intel Corporation, Santa Clara, CA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mani","family":"Azimi","sequence":"additional","affiliation":[{"name":"Intel Corporation, Santa Clara, CA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2013,6,23]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"AMD Radeon HD 7970 Graphics AMD. {Online}. Available: amd.com AMD Radeon HD 7970 Graphics AMD. {Online}. Available: amd.com"},{"key":"e_1_3_2_1_2_1","volume-title":"UC Berkeley","author":"Asanovic K.","year":"1998"},{"key":"e_1_3_2_1_3_1","volume-title":"Analyzing CUDA workloads using a detailed GPU simulator,\" in Proceedings of International Symposium on Performance Analsys of Systems and Software","author":"Bakhoda A.","year":"2009"},{"key":"e_1_3_2_1_4_1","volume-title":"MIT","author":"Batten C. F.","year":"2010"},{"key":"e_1_3_2_1_5_1","first-page":"49","author":"Brunie N.","year":"2012","journal-title":"\"Simultaneous branch and warp interweaving for sustained GPU performance,\" in Proceedings of International Symposium on Computer Architecture"},{"key":"e_1_3_2_1_6_1","unstructured":"ILLIAC IV -- System Description Burroughs Corp 1974 Computer History Museum resource. ILLIAC IV -- System Description Burroughs Corp 1974 Computer History Museum resource."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2010.5650274"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155676"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.5555\/548716.822690"},{"key":"e_1_3_2_1_11_1","first-page":"25","volume-title":"Thread block compaction for efficient simt control flow,\" in International Symposium on High Performance Computer Architecture","author":"Fung W.","year":"2011"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.12"},{"key":"e_1_3_2_1_13_1","volume-title":"Intel next generation microarchitecture code name IvyBridge,\" in Intel Developer Forum","author":"George V.","year":"2012"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/1964179.1964184"},{"key":"e_1_3_2_1_15_1","volume-title":"GPU Computing Gems --- Jade and Emerald Eds. Morgan Kaufmann","author":"Hwu W.","year":"2011"},{"key":"e_1_3_2_1_16_1","volume-title":"Intel Corp","author":"Intel Processor Graphics X Developer's Guide","year":"2012"},{"key":"e_1_3_2_1_17_1","volume-title":"Intel Corp","author":"Graphics Programmer's Reference Intel Open","year":"2012"},{"key":"e_1_3_2_1_18_1","volume-title":"Intel Corp","author":"Applications Intel SDK","year":"2012"},{"key":"e_1_3_2_1_19_1","unstructured":"D. Kanter \"Intel's IvyBridge graphics architecture.\" {Online}. Available: realworldtech.com\/ivy-bridge-gpu\/  D. Kanter \"Intel's IvyBridge graphics architecture.\" {Online}. Available: realworldtech.com\/ivy-bridge-gpu\/"},{"key":"e_1_3_2_1_20_1","unstructured":"OpenCL - The open standard for parallel programming of heterogeneous systems The Khronos Group. {Online}. Available: khronos.org\/opencl\/ OpenCL - The open standard for parallel programming of heterogeneous systems The Khronos Group. {Online}. Available: khronos.org\/opencl\/"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2000064.2000080"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/964965.808581"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815992"},{"key":"e_1_3_2_1_24_1","unstructured":"Compute Shader Overview Microsoft Corp. {Online}. Available: msdn.microsoft.com\/en-us\/library\/ff476331.aspx Compute Shader Overview Microsoft Corp. {Online}. Available: msdn.microsoft.com\/en-us\/library\/ff476331.aspx"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155656"},{"key":"e_1_3_2_1_26_1","volume-title":"Nvidia Corp","author":"Brief Technical","year":"2006"},{"key":"e_1_3_2_1_27_1","volume-title":"Nvidia Corp","author":"Programming Guide NVIDIA CUDA C","year":"2012"},{"key":"e_1_3_2_1_28_1","volume-title":"Nvidia Corp","author":"Compute Architecture A's Next","year":"2012"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2008.917757"},{"key":"e_1_3_2_1_30_1","first-page":"61","author":"Rhu M.","year":"2012","journal-title":"\"CAPRI: prediction of compaction-adequacy for handling control-divergence in GPGPU architectures,\" in Proceedings of International Symposium on Computer Architecture"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2006.74"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/339647.339693"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2018323.2018331"},{"key":"e_1_3_2_1_34_1","volume-title":"May","author":"Woligroski D.","year":"2012"}],"event":{"name":"ISCA'13: The 40th Annual International Symposium on Computer Architecture","location":"Tel-Aviv Israel","acronym":"ISCA'13","sponsor":["IEEE CS","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 40th Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2485922.2485954","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2485922.2485954","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:48:43Z","timestamp":1750222123000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2485922.2485954"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,6,23]]},"references-count":34,"alternative-id":["10.1145\/2485922.2485954","10.1145\/2485922"],"URL":"https:\/\/doi.org\/10.1145\/2485922.2485954","relation":{"is-identical-to":[{"id-type":"doi","id":"10.1145\/2508148.2485954","asserted-by":"object"}]},"subject":[],"published":{"date-parts":[[2013,6,23]]},"assertion":[{"value":"2013-06-23","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}