{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T21:52:16Z","timestamp":1768341136659,"version":"3.49.0"},"reference-count":47,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,10]]},"DOI":"10.1109\/nas51552.2021.9605411","type":"proceedings-article","created":{"date-parts":[[2021,11,22]],"date-time":"2021-11-22T21:12:15Z","timestamp":1637615535000},"page":"1-8","source":"Crossref","is-referenced-by-count":6,"title":["LocalityGuru: A PTX Analyzer for Extracting Thread Block-level Locality in GPGPUs"],"prefix":"10.1109","author":[{"given":"Devashree","family":"Tripathy","sequence":"first","affiliation":[],"role":[{"role":"author","vocab":"crossref"}]},{"given":"AmirAli","family":"Abdolrashidi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocab":"crossref"}]},{"given":"Quan","family":"Fan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocab":"crossref"}]},{"given":"Daniel","family":"Wong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocab":"crossref"}]},{"given":"Manoranjan","family":"Satpathy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocab":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2012.71"},{"key":"ref38","article-title":"Mapa: Multi-accelerator pattern allocation policy for multi-tenant gpu servers","author":"ranganath","year":"2021","journal-title":"SC14 International Conference for High Performance Computing Networking Storage and Analysis SC"},{"key":"ref33","article-title":"CUDA Toolkit","year":"2007"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/1964179.1964192"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2004.1281665"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2019.2933842"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807611"},{"key":"ref35","article-title":"Polybench: The polyhedral benchmark suite","author":"pouchet","year":"2012"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/2989081.2989102"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-41321-1_7"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/360128.360134"},{"key":"ref12","first-page":"0","article-title":"The design and verification of the alphastation 600 5-series workstation","volume":"7","author":"zurawski","year":"1995","journal-title":"Digital Technical Journal"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2019.00072"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807629"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/1188455.1188543"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1177\/1094342007078442"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/1103845.1094852"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00074"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3232521"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3123976"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3370748.3406553"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/2751205.2751213"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3330345.3330373"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2018.00024"},{"key":"ref29","first-page":"1","article-title":"Maps: Optimizing massively parallel applications using device-level memory abstraction","volume":"11","author":"rubin","year":"2014","journal-title":"ACM Transactions on Architecture and Code Optimization (TACO)"},{"key":"ref5","first-page":"73","article-title":"Apogee: Adaptive prefetching on gpus for energy efficiency","author":"sethia","year":"2013","journal-title":"Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00034"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/2751205.2751239"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3370748.3406577"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3451164"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2020.3023723"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/3033019.3033022"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3410463.3414641"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/1809028.1806606"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3406538"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/1375527.1375562"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00086"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2015.7054184"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2017.2693371"},{"key":"ref41","first-page":"172","article-title":"Hierarchical place trees: A portable abstraction for task parallelism and data movement","author":"yan","year":"2009","journal-title":"International Workshop on Languages and Compilers for Parallel Computing"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.57"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/509705.509708"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3093337.3037709"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/HIPC.2010.5713187"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835937"}],"event":{"name":"2021 IEEE International Conference on Networking, Architecture and Storage (NAS)","location":"Riverside, CA, USA","start":{"date-parts":[[2021,10,24]]},"end":{"date-parts":[[2021,10,26]]}},"container-title":["2021 IEEE International Conference on Networking, Architecture and Storage (NAS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9605358\/9605359\/09605411.pdf?arnumber=9605411","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T16:51:26Z","timestamp":1652201486000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9605411\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10]]},"references-count":47,"URL":"https:\/\/doi.org\/10.1109\/nas51552.2021.9605411","relation":{},"subject":[],"published":{"date-parts":[[2021,10]]}}}