{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T11:23:16Z","timestamp":1730287396986,"version":"3.28.0"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,12,10]],"date-time":"2021-12-10T00:00:00Z","timestamp":1639094400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,12,10]],"date-time":"2021-12-10T00:00:00Z","timestamp":1639094400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,12,10]]},"DOI":"10.1109\/paap54281.2021.9720477","type":"proceedings-article","created":{"date-parts":[[2022,3,4]],"date-time":"2022-03-04T20:26:05Z","timestamp":1646425565000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["Reducing TLB Miss Penalty on GPUs via Unified Multi-level PWB and PWC"],"prefix":"10.1109","author":[{"given":"Yang","family":"Lin","sequence":"first","affiliation":[{"name":"School of Computer NUDT,Changsha,China"}]},{"given":"Dunbo","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer NUDT,Changsha,China"}]},{"given":"Chaoyang","family":"Jia","sequence":"additional","affiliation":[{"name":"School of Computer NUDT,Changsha,China"}]},{"given":"Qiong","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer NUDT,Changsha,China"}]},{"given":"Li","family":"Shen","sequence":"additional","affiliation":[{"name":"School of Computer NUDT,Changsha,China"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835964"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/2644865.2541942"},{"key":"ref12","article-title":"Supporting x86-64 address translation for 100s of GPU lanes","author":"power","year":"0","journal-title":"IEEE International Symposium on High-Performance Comp Architecture"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/2751205.2751239"},{"journal-title":"Mitigating gpu memory divergence for data-intensive applications","year":"2015","author":"wang","key":"ref14"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/2517349.2522715"},{"key":"ref16","first-page":"180","author":"shin","year":"2018","journal-title":"Scheduling Page Table Walks for Irregular GPU Applications (ISCA '18)"},{"key":"ref17","first-page":"352","author":"shin","year":"2018","journal-title":"Neighborhood-Aware Address Translation for Irregular GPU Applications"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/1654059.1654082"},{"key":"ref4","first-page":"307","article-title":"SpecTLB: A mechanism for speculative address translation","author":"barr","year":"2011","journal-title":"2011 38th Annual International Symposium on Computer Architecture (ISCA) ISCA"},{"key":"ref27","first-page":"45:1","article-title":"Improving the Efficiency of GPGPU Work-Queue Through Data Awareness","volume":"14","author":"zhang","year":"2017","journal-title":"ACM Trans Archit Code Optim"},{"journal-title":"Nvidia's Next Generation CUDA Compute Architecture Fermi","year":"2009","author":"nvidia","key":"ref3"},{"key":"ref6","first-page":"433","author":"esteve","year":"2014","journal-title":"Exploiting Parallelization on Address Translation Shared Page Walk Cache[J]"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2012.6402918"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540718"},{"key":"ref7","first-page":"637","author":"haria","year":"2018","journal-title":"Devirtualizing Memory in Heterogeneous Systems"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2012.2232647"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"ref1","first-page":"136","author":"ausavarungnirun","year":"2017","journal-title":"Mosaic A GPU Memory Manager with Application-Transparent Support for Multiple Page Sizes"},{"key":"ref20","first-page":"161","author":"vesely","year":"2016","journal-title":"Observations and opportunities in architecting shared virtual memory for heterogeneous systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/1816038.1815992"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2014.16"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.16"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815970"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/s11704-018-7386-4"},{"journal-title":"PolyBench The First Benchmark for Polystores[M]","year":"2018","author":"karimov","key":"ref25"}],"event":{"name":"2021 12th International Symposium on Parallel Architectures, Algorithms and Programming (PAAP)","start":{"date-parts":[[2021,12,10]]},"location":"Xi'an, China","end":{"date-parts":[[2021,12,12]]}},"container-title":["2021 12th International Symposium on Parallel Architectures, Algorithms and Programming (PAAP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9720298\/9720304\/09720477.pdf?arnumber=9720477","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,20]],"date-time":"2022-06-20T21:14:02Z","timestamp":1655759642000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9720477\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,10]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/paap54281.2021.9720477","relation":{},"subject":[],"published":{"date-parts":[[2021,12,10]]}}}