{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T13:19:11Z","timestamp":1758892751118,"version":"3.37.3"},"reference-count":43,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2017,2,1]],"date-time":"2017-02-01T00:00:00Z","timestamp":1485907200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61402285","61202026","61332001"],"award-info":[{"award-number":["61402285","61202026","61332001"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2014T70418"],"award-info":[{"award-number":["2014T70418"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Program of the China National 1000 Young Talent Plan and Shanghai Science and Technology Committee","award":["15YF1406000"],"award-info":[{"award-number":["15YF1406000"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. VLSI Syst."],"published-print":{"date-parts":[[2017,2]]},"DOI":"10.1109\/tvlsi.2016.2584623","type":"journal-article","created":{"date-parts":[[2016,7,28]],"date-time":"2016-07-28T18:18:21Z","timestamp":1469729901000},"page":"520-533","source":"Crossref","is-referenced-by-count":7,"title":["Bank Stealing for a Compact and Efficient Register File Architecture in GPGPU"],"prefix":"10.1109","volume":"25","author":[{"given":"Naifeng","family":"Jing","sequence":"first","affiliation":[]},{"given":"Shunning","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Shuang","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Jingjie","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Li","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Chao","family":"Li","sequence":"additional","affiliation":[]},{"given":"Xiaoyao","family":"Liang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/1165573.1165633"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2001.991122"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540715"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2015.2417545"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485952"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2013.6522331"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2002.1176248"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485965"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.1999.765938"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541944"},{"year":"2012","key":"ref10","article-title":"Parallel thread execution ISA version 3.0"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/1596510.1596511"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2005.88"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/2016604.2016608"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/859618.859627"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.16"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155656"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/782837.782839"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ISLPED.2013.6629258"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750417"},{"key":"ref19","first-page":"143","article-title":"Technology comparison for large last-level caches (L3Cs): Low-leakage SRAM, low write-energy STT-RAM, and refresh-optimized eDRAM","author":"chang","year":"2013","journal-title":"Proc 19th IEEE Int Symp High Perform Comput Archit (HPCA)"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669140"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/2000064.2000093"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485964"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2013.6522337"},{"key":"ref6","first-page":"824","article-title":"Architectural power models for SRAM and cam structures based on hybrid analytical\/empirical techniques","author":"liang","year":"2007","journal-title":"Proc Int Conf Comput -Aided Design (ICCAD)"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.40"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.18"},{"article-title":"Pseudo-dual port memory where ratio of first to second memory access is clock duty cycle independent","year":"2007","author":"jung","key":"ref8"},{"key":"ref7","first-page":"247","article-title":"SRAM-DRAM hybrid memory with applications to efficient register files in fine-grained multi-threading","author":"yu","year":"2011","journal-title":"2011 38th Annual International Symposium on Computer Architecture (ISCA) ISCA"},{"year":"2012","key":"ref2","article-title":"Nvidia&#x2019;s next generation CUDA compute architecture: Kepler GK110"},{"key":"ref9","first-page":"55","article-title":"Bank stealing for conflict mitigation in GPGPU register file","author":"jing","year":"2015","journal-title":"Proc IEEE\/ACM Int Symp Low Power Electron Design"},{"year":"2009","key":"ref1","article-title":"NVIDIA&#x2019;s next generation CUDA compute architecture: Fermi"},{"journal-title":"Single-Port Register-File User Guide","year":"2012","key":"ref20"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD.2011.6105418"},{"key":"ref21","first-page":"87","article-title":"A \n$128\\times 128\\times 24$\n  Gb\/s crossbar interconnecting 128 tiles in a single hop and occupying 6% of their area","author":"passas","year":"2010","journal-title":"Proc 4th ACM\/IEEE Int Symp Netw -Chip"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.11"},{"journal-title":"NVIDIA Cuda Toolkit","year":"2013","key":"ref24"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835938"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"journal-title":"GPGPU-Sim 3 x Simulator","year":"2014","author":"aamodt","key":"ref26"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2013.6522351"},{"article-title":"Parboil: A revised benchmark suite for scientific and commercial throughput computing","year":"2012","author":"stratton","key":"ref25"}],"container-title":["IEEE Transactions on Very Large Scale Integration (VLSI) Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/92\/7827022\/07524807.pdf?arnumber=7524807","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T16:20:16Z","timestamp":1642004416000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7524807\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,2]]},"references-count":43,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tvlsi.2016.2584623","relation":{},"ISSN":["1063-8210","1557-9999"],"issn-type":[{"type":"print","value":"1063-8210"},{"type":"electronic","value":"1557-9999"}],"subject":[],"published":{"date-parts":[[2017,2]]}}}