{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,15]],"date-time":"2025-12-15T19:33:30Z","timestamp":1765827210762,"version":"3.28.0"},"reference-count":38,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014,6]]},"DOI":"10.1109\/isca.2014.6853209","type":"proceedings-article","created":{"date-parts":[[2014,7,29]],"date-time":"2014-07-29T15:19:17Z","timestamp":1406647157000},"page":"181-192","source":"Crossref","is-referenced-by-count":17,"title":["Fine-grain task aggregation and coordination on GPUs"],"prefix":"10.1109","author":[{"given":"Marc S.","family":"Orr","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bradford M.","family":"Beckmann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Steven K.","family":"Reinhardt","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David A.","family":"Wood","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"19","doi-asserted-by":"crossref","first-page":"179","DOI":"10.1007\/3-540-45937-5_14","article-title":"StreamIt: A lan-guage for streaming applications","author":"thies","year":"2002","journal-title":"Proceedings of the 11th Inter-national Conference on Compiler Construction"},{"key":"35","doi-asserted-by":"publisher","DOI":"10.1145\/1572769.1572792"},{"journal-title":"Optimizing Parallel Reduction in CUDA","year":"0","author":"harris","key":"17"},{"key":"36","first-page":"29","article-title":"Task management for irregu-lar-parallel workloads on the GPU","author":"tzeng","year":"2010","journal-title":"Proceedings of the Confer-ence on High Performance Graphics"},{"key":"18","doi-asserted-by":"publisher","DOI":"10.1145\/1327452.1327492"},{"key":"33","doi-asserted-by":"publisher","DOI":"10.1145\/1572769.1572797"},{"key":"15","first-page":"117","article-title":"An optimistic approach to lock-free FIFO queues","author":"ladan-mozes","year":"2004","journal-title":"Proceedings of the 18th International Sympo-sium on Distributed Computing"},{"key":"34","doi-asserted-by":"publisher","DOI":"10.1145\/2366145.2366180"},{"journal-title":"HSA Programmers Reference Manual HSAIL Virtual ISA and Programming Model","year":"2013","key":"16"},{"key":"13","doi-asserted-by":"publisher","DOI":"10.1145\/69624.357206"},{"key":"14","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1006\/jpdc.1998.1446","article-title":"Nonblocking algorithms and preemption-safe locking on multiprogrammed shared memory mul-tiprocessors","volume":"51","author":"michael","year":"1998","journal-title":"J Parallel Distrib Comput"},{"key":"37","doi-asserted-by":"crossref","first-page":"296","DOI":"10.1145\/2155620.2155655","article-title":"Hard-ware transactional memory for GPU Architectures","author":"fung","year":"2011","journal-title":"Proceedings of the 44th Annual IEEE\/ACM International Symposium on Microar-chitecture"},{"key":"11","first-page":"64","article-title":"Implementing lock-free queues","author":"valois","year":"1994","journal-title":"Proceedings of the Seventh International Conference on Parallel and Distributed Com-puting Systems"},{"key":"38","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485935"},{"journal-title":"A library of concurrent objects and their proofs of correctness","year":"1990","author":"gong","key":"12"},{"key":"21","doi-asserted-by":"publisher","DOI":"10.1145\/277650.277725"},{"key":"20","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1145\/1477926.1477930","article-title":"GRAMPS: A programming model for graphics pipelines","volume":"28","author":"sugerman","year":"2009","journal-title":"ACM Trans Graph"},{"key":"22","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2010.5470427"},{"key":"23","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2009.5161079"},{"key":"24","doi-asserted-by":"publisher","DOI":"10.1145\/2024716.2024718"},{"key":"25","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2007.43"},{"key":"26","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485940"},{"key":"27","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835930"},{"key":"28","doi-asserted-by":"publisher","DOI":"10.1109\/SFCS.1986.52"},{"journal-title":"AMD Accelerated Parallel Processing SDK","year":"0","key":"29"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2011.5746311"},{"key":"2","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2011.5746314"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.1998.658762"},{"key":"10","article-title":"Hierarchical work stealing on manycore clusters","author":"min","year":"2011","journal-title":"Fifth Conference on Partitioned Global Ad-dress Space Programming Models"},{"key":"30","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"journal-title":"OpenCL 2 0 Reference Pages","year":"0","key":"7"},{"journal-title":"CUDA C Programming Guide","year":"0","key":"6"},{"key":"32","first-page":"237","article-title":"Improving SIMT efficiency of global rendering algorithms with architectural support for dynamic mi-cro-kernels","author":"steffen","year":"2010","journal-title":"Proceedings of the 43rd Annual IEEE\/ACM Interna-tional Symposium on Microarchitecture"},{"key":"5","article-title":"Heterogeneous system architecture: A technical re-view","author":"kyriazis","year":"2012","journal-title":"AMD Aug"},{"journal-title":"Bringing High-end Graphics to Handheld Devices","year":"2011","key":"4"},{"key":"31","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2013.24"},{"journal-title":"Intel Threading Building Blocks","year":"0","key":"9"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2012.257"}],"event":{"name":"2014 ACM\/IEEE 41st International Symposium on Computer Architecture (ISCA)","start":{"date-parts":[[2014,6,14]]},"location":"Minneapolis, MN, USA","end":{"date-parts":[[2014,6,18]]}},"container-title":["2014 ACM\/IEEE 41st International Symposium on Computer Architecture (ISCA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6847316\/6853187\/06853209.pdf?arnumber=6853209","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,13]],"date-time":"2019-08-13T05:19:19Z","timestamp":1565673559000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6853209\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,6]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/isca.2014.6853209","relation":{},"subject":[],"published":{"date-parts":[[2014,6]]}}}