{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T06:34:33Z","timestamp":1767854073328,"version":"3.49.0"},"reference-count":28,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014,2]]},"DOI":"10.1109\/hpca.2014.6835936","type":"proceedings-article","created":{"date-parts":[[2014,7,28]],"date-time":"2014-07-28T14:48:01Z","timestamp":1406558881000},"page":"248-259","source":"Crossref","is-referenced-by-count":23,"title":["A scalable multi-path microarchitecture for efficient GPU control flow"],"prefix":"10.1109","author":[{"given":"Ahmed","family":"ElTantawy","sequence":"first","affiliation":[]},{"given":"Jessica Wenjie","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Mike","family":"O'Connor","sequence":"additional","affiliation":[]},{"given":"Tor M.","family":"Aamodt","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"19","year":"0"},{"key":"17","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815992"},{"key":"18","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155656"},{"key":"15","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485964"},{"key":"16","doi-asserted-by":"publisher","DOI":"10.1145\/800031.808581"},{"key":"13","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2011.89"},{"key":"14","first-page":"1","article-title":"Convergence and scalarization for data- parallel architectures","author":"lee","year":"2013","journal-title":"Proc Int Symp Code Generation Optimization (CGO)"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2010.51"},{"key":"12","first-page":"88","article-title":"Characterizing and evaluating a key-value store application on heterogeneous CPU-GPU systems","author":"hetherington","year":"2012","journal-title":"Proc IEEE Symp on Perf Analysis of Systems and Software (ISPASS)"},{"key":"21","first-page":"235","article-title":"The dual-path execution model for efficient gpu control flow","author":"rhu","year":"2013","journal-title":"Proc IEEE Symp on High- Perf Computer Architecture (HPCA)"},{"key":"20","doi-asserted-by":"publisher","DOI":"10.1145\/2366231.2337167"},{"key":"22","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.16"},{"key":"23","author":"shah","year":"2010","journal-title":"FabMem A Multiported RAM and CAM Compiler for Superscalar Design Space Exploration"},{"key":"24","doi-asserted-by":"publisher","DOI":"10.1109\/MSE.2007.44"},{"key":"25","year":"2011"},{"key":"26","year":"2010","journal-title":"Intel HD Graphics OpenSource Programmer Reference Manual"},{"key":"27","author":"tsiodras","year":"2013","journal-title":"Real-time Raytracing Renderer"},{"key":"28","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2013.6522353"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1145\/2384616.2384625"},{"key":"2","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2011.5749714"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.1118\/1.3231824"},{"key":"7","author":"coon","year":"2008","journal-title":"Tracking Register Usage during Multithreaded Processing Using A Scoreboard Having Separate Memory Regions and Storing Sequential Register Size Indicators"},{"key":"6","article-title":"Stack-less SIMT reconvergence at low cost","author":"collange","year":"2011","journal-title":"Technical Report hal-00622654 ARENAIRE - Inria Grenoble Rho?ne-Alpes \/ LIP Laboratoire de L'Informatique du Paralle?lisme"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2012.6237005"},{"key":"9","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.30"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155676"}],"event":{"name":"2014 IEEE 20th International Symposium on High Performance Computer Architecture (HPCA)","location":"Orlando, FL, USA","start":{"date-parts":[[2014,2,15]]},"end":{"date-parts":[[2014,2,19]]}},"container-title":["2014 IEEE 20th International Symposium on High Performance Computer Architecture (HPCA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6823235\/6835920\/06835936.pdf?arnumber=6835936","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,3,23]],"date-time":"2017-03-23T19:40:47Z","timestamp":1490298047000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6835936\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,2]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/hpca.2014.6835936","relation":{},"subject":[],"published":{"date-parts":[[2014,2]]}}}