{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:17:25Z","timestamp":1777655845081,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2016,12,5]],"date-time":"2016-12-05T00:00:00Z","timestamp":1480896000000},"content-version":"vor","delay-in-days":366,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"ARM, Inc."},{"DOI":"10.13039\/100000185","name":"Defense Advanced Research Projects Agency","doi-asserted-by":"publisher","award":["PERFECT"],"award-info":[{"award-number":["PERFECT"]}],"id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2015,12,5]]},"DOI":"10.1145\/2830772.2830830","type":"proceedings-article","created":{"date-parts":[[2016,1,11]],"date-time":"2016-01-11T08:38:13Z","timestamp":1452501493000},"page":"433-444","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":29,"title":["WarpPool"],"prefix":"10.1145","author":[{"given":"John","family":"Kloosterman","sequence":"first","affiliation":[{"name":"University of Michigan, Ann Arbor, MI"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jonathan","family":"Beaumont","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, MI"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mick","family":"Wollman","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, MI"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ankit","family":"Sethia","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, MI"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ron","family":"Dreslinski","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, MI"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Trevor","family":"Mudge","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, MI"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Scott","family":"Mahlke","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, MI"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2015,12,5]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-11970-5_14"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.11"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/178243.178259"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/InPar.2012.6339595"},{"key":"e_1_3_2_1_7_1","unstructured":"M. Harris. An efficient matrix transpose in cuda c\/c++. http:\/\/devblogs.nvidia.com\/parallelforall\/efficient-matrix-transpose-cuda-cc. Accessed: April 2015."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2014.6983054"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835938"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2451116.2451158"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2508148.2485951"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/263580.263595"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.23"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835937"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/1504176.1504194"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485964"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835955"},{"key":"e_1_3_2_1_18_1","unstructured":"Nvidia. GeForce GTX 480. http:\/\/www.geforce.com\/hardware\/desktop-gpus\/geforce-gtx-480\/specifications."},{"key":"e_1_3_2_1_19_1","unstructured":"Nvidia. GeForce GTX 680. http:\/\/www.geforce.com\/Active\/en_US\/en_US\/pdf\/GeForce-GTX-680-Whitepaper-FINAL.pdf."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/232973.232989"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/305138.305148"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.5555\/266800.266805"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.16"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540718"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056031"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.16"},{"key":"e_1_3_2_1_27_1","unstructured":"J. A. Stratton et al. Parboil: A revised benchmark suite for scientific and commercial throughput computing. Technical Report IMPACT-12-01 University of Illinois at Urbana-Champaign."},{"key":"e_1_3_2_1_28_1","unstructured":"S. Thoziyoor N. Muralimanohar and N. P. Jouppi. Cacti 5.0. HP Laboratories Technical Report 2007."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/1806596.1806606"},{"key":"e_1_3_2_1_30_1","volume-title":"PP(99):1--1","author":"Zheng Z.","year":"2014","unstructured":"Z. Zheng, Z. Wang, and M. Lipasti. Adaptive cache and concurrency allocation on gpgpus. Computer Architecture Letters, PP(99):1--1, 2014."}],"event":{"name":"MICRO-48: The 48th Annual IEEE\/ACM International Symposium of Microarchitecture","location":"Waikiki Hawaii","acronym":"MICRO-48","sponsor":["IEEE Computer Society TC-uARCH","SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing"]},"container-title":["Proceedings of the 48th International Symposium on Microarchitecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2830772.2830830","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2830772.2830830","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2830772.2830830","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T09:29:59Z","timestamp":1763458199000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2830772.2830830"}},"subtitle":["sharing requests with inter-warp coalescing for throughput processors"],"short-title":[],"issued":{"date-parts":[[2015,12,5]]},"references-count":30,"alternative-id":["10.1145\/2830772.2830830","10.1145\/2830772"],"URL":"https:\/\/doi.org\/10.1145\/2830772.2830830","relation":{},"subject":[],"published":{"date-parts":[[2015,12,5]]},"assertion":[{"value":"2015-12-05","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}