{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T10:12:27Z","timestamp":1776420747356,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2016,5,31]],"date-time":"2016-05-31T00:00:00Z","timestamp":1464652800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Google India Private Limited"},{"name":"TCS"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,5,31]]},"DOI":"10.1145\/2907294.2907298","type":"proceedings-article","created":{"date-parts":[[2016,6,2]],"date-time":"2016-06-02T19:23:42Z","timestamp":1464895422000},"page":"203-214","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Improving GPU Performance Through Resource Sharing"],"prefix":"10.1145","author":[{"given":"Vishwesh","family":"Jatala","sequence":"first","affiliation":[{"name":"Indian Institute of Technology, Kanpur, Kanpur, India"}]},{"given":"Jayvant","family":"Anantpur","sequence":"additional","affiliation":[{"name":"Indian Institute of Science, Bangalore, Bangalore, India"}]},{"given":"Amey","family":"Karkare","sequence":"additional","affiliation":[{"name":"Indian Institute of Technology, Kanpur, Kanpur, India"}]}],"member":"320","published-online":{"date-parts":[[2016,5,31]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"CUDA C Programming Guide. https:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/.  CUDA C Programming Guide. https:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/."},{"key":"e_1_3_2_1_2_1","unstructured":"CUDA-SDK. http:\/\/docs.nvidia.com\/cuda\/cuda-samples.  CUDA-SDK. http:\/\/docs.nvidia.com\/cuda\/cuda-samples."},{"key":"e_1_3_2_1_3_1","unstructured":"GPGPU-Sim. http:\/\/www.gpgpu-sim.org.  GPGPU-Sim. http:\/\/www.gpgpu-sim.org."},{"key":"e_1_3_2_1_4_1","unstructured":"Parboil Benchmarks. http:\/\/impact.crhc.illinois.edu\/Parboil\/parboil.aspx.  Parboil Benchmarks. http:\/\/impact.crhc.illinois.edu\/Parboil\/parboil.aspx."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2013.6522337"},{"key":"e_1_3_2_1_6_1","volume-title":"CC","author":"Anantpur J.","year":"2014","unstructured":"J. Anantpur and R. Govindarajan . Taming Control Divergence in GPUs through Control Flow Linearization . In CC , 2014 . J. Anantpur and R. Govindarajan. Taming Control Divergence in GPUs through Control Flow Linearization. In CC, 2014."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"e_1_3_2_1_8_1","volume-title":"ISCA","author":"Brunie N.","year":"2012","unstructured":"N. Brunie , S. Collange , and G. Diamos . Simultaneous Branch and Warp Interweaving for Sustained GPU Performance . In ISCA , 2012 . N. Brunie, S. Collange, and G. Diamos. Simultaneous Branch and Warp Interweaving for Sustained GPU Performance. In ISCA, 2012."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155676"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.5555\/2014698.2014893"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.12"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2166879.2166882"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.18"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/1964179.1964184"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"V. Jatala J. Anantpur and A. Karkare. Improving GPU Performance Through Resource Sharing. CoRR http:\/\/arxiv.org\/abs\/1503.05694 2015.  V. Jatala J. Anantpur and A. Karkare. Improving GPU Performance Through Resource Sharing. CoRR http:\/\/arxiv.org\/abs\/1503.05694 2015.","DOI":"10.1145\/2907294.2907298"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830784"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2451116.2451158"},{"key":"e_1_3_2_1_19_1","volume-title":"PACT","author":"Kayiran O.","year":"2013","unstructured":"O. Kayiran , A. Jog , M. Kandemir , and C. Das . Neither more nor less: Optimizing thread-level parallelism for GPGPUs . In PACT , 2013 . O. Kayiran, A. Jog, M. Kandemir, and C. Das. Neither more nor less: Optimizing thread-level parallelism for GPGPUs. In PACT, 2013."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835937"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750418"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2628071.2628107"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056024"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2011.88"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/1854273.1854348"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815992"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155656"},{"key":"e_1_3_2_1_28_1","volume-title":"ISCA","author":"Rhu M.","year":"2012","unstructured":"M. Rhu and M. Erez . CAPRI: Prediction of Compaction-adequacy for Handling Control-divergence in GPGPU Architectures . In ISCA , 2012 . M. Rhu and M. Erez. CAPRI: Prediction of Compaction-adequacy for Handling Control-divergence in GPGPU Architectures. In ISCA, 2012."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.16"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056031"},{"key":"e_1_3_2_1_31_1","volume-title":"On demand register allocation and deallocation for a multithreaded processor","author":"Tarjan D.","year":"2011","unstructured":"D. Tarjan and K. Skadron . On demand register allocation and deallocation for a multithreaded processor , 2011 . US Patent App . 12\/649,238. D. Tarjan and K. Skadron. On demand register allocation and deallocation for a multithreaded processor, 2011. US Patent App. 12\/649,238."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835939"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830813"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2370816.2370858"}],"event":{"name":"HPDC'16: The 25th International Symposium on High-Performance Parallel and Distributed Computing","location":"Kyoto Japan","acronym":"HPDC'16","sponsor":["University of Arizona University of Arizona","SIGARCH ACM Special Interest Group on Computer Architecture","SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the 25th ACM International Symposium on High-Performance Parallel and Distributed Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2907294.2907298","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2907294.2907298","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:54:25Z","timestamp":1750222465000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2907294.2907298"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,5,31]]},"references-count":34,"alternative-id":["10.1145\/2907294.2907298","10.1145\/2907294"],"URL":"https:\/\/doi.org\/10.1145\/2907294.2907298","relation":{},"subject":[],"published":{"date-parts":[[2016,5,31]]},"assertion":[{"value":"2016-05-31","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}