{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T17:35:26Z","timestamp":1775842526163,"version":"3.50.1"},"reference-count":37,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/100001641","name":"GRF","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100001641","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001747","name":"HKBU","doi-asserted-by":"publisher","award":["210412"],"award-info":[{"award-number":["210412"]}],"id":[{"id":"10.13039\/501100001747","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001747","name":"HKBU","doi-asserted-by":"publisher","award":["FRG2\/14-15\/059"],"award-info":[{"award-number":["FRG2\/14-15\/059"]}],"id":[{"id":"10.13039\/501100001747","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shenzhen Basic Research","award":["SCI-2015-SZTIC-002"],"award-info":[{"award-number":["SCI-2015-SZTIC-002"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2017,1,1]]},"DOI":"10.1109\/tpds.2016.2549523","type":"journal-article","created":{"date-parts":[[2016,3,31]],"date-time":"2016-03-31T18:08:50Z","timestamp":1459447730000},"page":"72-86","source":"Crossref","is-referenced-by-count":148,"title":["Dissecting GPU Memory Hierarchy Through Microbenchmarking"],"prefix":"10.1109","volume":"28","author":[{"given":"Xinxin","family":"Mei","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9745-4372","authenticated-orcid":false,"given":"Xiaowen","family":"Chu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","first-page":"279","article-title":"lmbench: Portable tools for performance analysis","author":"mcvoy","year":"0","journal-title":"Proc USENIX Annu Tech Conf"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/2751205.2751237"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/1854273.1854336"},{"key":"ref30","first-page":"13:1","article-title":"Dymaxion:\n Optimizing memory access patterns for heterogeneous systems","author":"che","year":"0","journal-title":"Proc Int Conf High Perform Comput Netw Storage Anal"},{"key":"ref37","article-title":"GPU performance analysis and optimization","author":"micikevicius","year":"2012"},{"key":"ref36","article-title":"Better performance at lower occupancy","author":"volkov","year":"2010"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/384286.264152"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-89740-8_13"},{"key":"ref10","article-title":"matrixMulCUBLAS","year":"0","journal-title":"CUDA SDK 6 5"},{"key":"ref11","year":"0","journal-title":"Fermi Whitepaper"},{"key":"ref12","year":"0","journal-title":"Kepler GK110 Whitepaper"},{"key":"ref13","year":"0","journal-title":"Tuning CUDA Applications for Kepler"},{"key":"ref14","year":"0","journal-title":"Tuning CUDA applications for Maxwell"},{"key":"ref15","year":"0","journal-title":"CUDA C Program Guide - v7 5"},{"key":"ref16","year":"0","journal-title":"CUDA C Best Practices Guide"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2008.5214359"},{"key":"ref18","article-title":"Micro-benchmarking the GT200 GPU","author":"papadopoulou","year":"0"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2010.5452013"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD.2013.6691165"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btu047"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/2304576.2304582"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2011.89"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/1345206.1345220"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2010.107"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1186\/1471-2105-15-121"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/1513895.1513905"},{"key":"ref7","year":"0","journal-title":"NVIDIA GeForce GTX 980 Whitepaper"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2014.02.005"},{"key":"ref9","article-title":"matrixMul","year":"0","journal-title":"CUDA SDK 6 5"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2010.41"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2011.5749745"},{"key":"ref22","article-title":"Micro-benchmarking the C2070","author":"meltzer","year":"2013"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/2370036.2145820"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/12.467697"},{"key":"ref23","article-title":"CPU performance evaluation and execution time prediction using\n Narrow spectrum benchmarking","author":"saavedra","year":"1992"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/SAMOS.2014.6893202"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-44917-2_13"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/71\/7779224\/07445236.pdf?arnumber=7445236","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T16:12:42Z","timestamp":1642003962000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7445236\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,1,1]]},"references-count":37,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2016.2549523","relation":{},"ISSN":["1045-9219"],"issn-type":[{"value":"1045-9219","type":"print"}],"subject":[],"published":{"date-parts":[[2017,1,1]]}}}