{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T13:12:23Z","timestamp":1767705143007,"version":"3.41.0"},"reference-count":21,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J. Comput. Sci. Technol."],"published-print":{"date-parts":[[2016,1]]},"DOI":"10.1007\/s11390-016-1610-1","type":"journal-article","created":{"date-parts":[[2016,1,7]],"date-time":"2016-01-07T16:56:00Z","timestamp":1452185760000},"page":"36-49","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Performance-Centric Optimization for Racetrack Memory Based Register File on GPUs"],"prefix":"10.1007","volume":"31","author":[{"given":"Yun","family":"Liang","sequence":"first","affiliation":[]},{"given":"Shuo","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,1,8]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Gebhart M, Keckler S W, Khailany B, Krashinsky R, Dally W J. Unifying primary cache, scratch, and register file memories in a throughput processor. In Proc. the 45th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO), Dec. 2012, pp.96-106.","key":"1610_CR1","DOI":"10.1109\/MICRO.2012.18"},{"doi-asserted-by":"crossref","unstructured":"Li X, Liang Y. Energy-efficient kernel management on gpus. In Proc. the Design Automation and Test in Europe (DATE), Mar. 2016.","key":"1610_CR2","DOI":"10.3850\/9783981537079_0647"},{"doi-asserted-by":"crossref","unstructured":"Liang Y, Huynh H, Rupnow K, Goh R, Chen D. Efficient GPU spatial-temporal multitasking. IEEE Transactions on Parallel and Distributed Systems, 2015, 26(3): 748\u2013760.","key":"1610_CR3","DOI":"10.1109\/TPDS.2014.2313342"},{"doi-asserted-by":"crossref","unstructured":"Liang Y, Xie X, Sun G, Chen D. An efficient compiler framework for cache bypassing on GPUs. IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems, 2015, 34(10): 1677\u20131690.","key":"1610_CR4","DOI":"10.1109\/TCAD.2015.2424962"},{"doi-asserted-by":"crossref","unstructured":"Xie X, Liang Y, Li X, Wu Y, Sun G, Wang T, Fan D. Enabling coordinated register allocation and thread-level parallelism optimization for GPUs. In Proc. the 48th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO), Dec. 2015.","key":"1610_CR5","DOI":"10.1145\/2830772.2830813"},{"doi-asserted-by":"crossref","unstructured":"Xie X, Liang Y, Sun G, Chen D. An efficient compiler framework for cache bypassing on GPUs. In Proc. the International Conference on Computer Aided Design (ICCAD), Nov. 2013, pp.516-523.","key":"1610_CR6","DOI":"10.1109\/ICCAD.2013.6691165"},{"doi-asserted-by":"crossref","unstructured":"Xie X, Liang Y, Wang Y, Sun G, Wang T. Coordinated static and dynamic cache bypassing on GPUs. In Proc. the 21st IEEE International Symposium on High Performance Computer Architecture (HPCA), Feb. 2015, pp.76-88.","key":"1610_CR7","DOI":"10.1109\/HPCA.2015.7056023"},{"doi-asserted-by":"crossref","unstructured":"Mao M,Wen W, Zhang Y, Chen Y, Li H H. Exploration of GPGPU register file architecture using domain-wall-shiftwrite based racetrack memory. In Proc. the 51st Annual Design Automation Conference (DAC), June 2014, pp.196:1-196:6.","key":"1610_CR8","DOI":"10.1145\/2593069.2593137"},{"doi-asserted-by":"crossref","unstructured":"Zhang C, Sun G, Zhang W, Mi F, Li H, Zhao W. Quantitative modeling of racetrack memory, a tradeoff among area, performance, and power. In Proc. the 20th Asia and South Pacific Design Automation Conference (ASP-DAC), Jan. 2015, pp.100-105.","key":"1610_CR9","DOI":"10.1109\/ASPDAC.2015.7058988"},{"doi-asserted-by":"crossref","unstructured":"Parkin S S P, Hayashi M, Thomas L. Magnetic domain-wall racetrack memory. Science, 2008, 320(5873): 190\u2013194.","key":"1610_CR10","DOI":"10.1126\/science.1145799"},{"doi-asserted-by":"crossref","unstructured":"Sun Z, Wu W, Li H. Cross-layer racetrack memory design for ultra high density and low power consumption. In Proc. the 50th Annual Design Automation Conference (DAC), May 2013, Article No. 53.","key":"1610_CR11","DOI":"10.1145\/2463209.2488799"},{"doi-asserted-by":"crossref","unstructured":"Venkatesan R, Ramasubramanian S G, Venkataramani S, Roy K, Raghunathan A. Stag: Spintronic-tape architecture for GPGPU cache hierarchies. In Proc. the 41st Annual International Symposium on Computer Architecture (ISCA), Jun. 2014, pp.253-264.","key":"1610_CR12","DOI":"10.1145\/2678373.2665710"},{"doi-asserted-by":"crossref","unstructured":"Jing N, Shen Y, Lu Y, Ganapathy S, Mao Z, Guo M, Canal R, Liang X. An energy-efficient and scalable eDRAM-based register file architecture for GPGPU. In Proc. the 40th Annual International Symposium on Computer Architecture (ISCA), Jun. 2013, pp.344-355.","key":"1610_CR13","DOI":"10.1145\/2485922.2485952"},{"unstructured":"Wang S, Liang Y, Zhang C, Xie X, Sun G, Liu Y, Wang Y, Li X. Performance-centric register file design for GPUs using racetrack memory. In Proc. the 21st Asia and South Pacific Design Automation Conference (ASP-DAC), Jan. 2016.","key":"1610_CR14"},{"doi-asserted-by":"crossref","unstructured":"Kayiran O, Jog A, Kandemir M T, Das C R. Neither more nor less: Optimizing thread-level parallelism for GPGPUs. In Proc. the 22nd International Conference on Parallel Architectures and Compilation Techniques (PACT), Oct. 2013, pp.157-166.","key":"1610_CR15","DOI":"10.1109\/PACT.2013.6618806"},{"doi-asserted-by":"crossref","unstructured":"Jog A, Mishra A K, Xu C, Xie Y, Narayanan V, Iyer R, Das C R. Cache revive: Architecting volatile STT-RAM caches for enhanced performance in CMPs. In Proc. the 49th Annual Design Automation Conference (DAC), June 2012, pp.243-252.","key":"1610_CR16","DOI":"10.1145\/2228360.2228406"},{"doi-asserted-by":"crossref","unstructured":"Samavatian M H, Abbasitabar H, Arjomand M, Sarbazi-Azad H. An efficient STT-RAM last level cache architecture for GPUs. In Proc. the 51st Annual Design Automation Conference (DAC), May 2014, pp.197:1\u2013197:6.","key":"1610_CR17","DOI":"10.1145\/2593069.2593086"},{"doi-asserted-by":"crossref","unstructured":"Sun Z, Bi X, Li H H, Wong W F, Ong Z L, Zhu X, Wu W. Multi retention level STT-RAM cache designs with a dynamic refresh scheme. In Proc. the 44th Annual International Symposium on Microarchitecture (MICRO), Dec. 2011, pp.329-338.","key":"1610_CR18","DOI":"10.1145\/2155620.2155659"},{"doi-asserted-by":"crossref","unstructured":"Chen X, Sha E H M, Zhuge Q, Dai P, Jiang W. Optimizing data placement for reducing shift operations on domain wall memories. In Proc. the 52nd Annual Design Automation Conference (DAC), June 2015, pp.139:1\u2013139:6.","key":"1610_CR19","DOI":"10.1145\/2744769.2744883"},{"doi-asserted-by":"crossref","unstructured":"Venkatesan R, Kozhikkottu V, Augustine C, Raychowdhury A, Roy K, Raghunathan A. TapeCache: A high density, energy efficient cache based on domain wall memory. In Proc. the International Symposium on Low Power Electronics and Design (ISLPED), July 30-August 1, 2012, pp.185-190.","key":"1610_CR20","DOI":"10.1145\/2333660.2333707"},{"doi-asserted-by":"crossref","unstructured":"Jing N, Liu H, Lu Y Liang X. Compiler assisted dynamic register file in GPGPU. In Proc. the International Symposium on Low Power Electronics and Design (ISLPED), Sept. 2013, pp.3-8.","key":"1610_CR21","DOI":"10.1109\/ISLPED.2013.6629258"}],"container-title":["Journal of Computer Science and Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-016-1610-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11390-016-1610-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11390-016-1610-1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T02:23:12Z","timestamp":1748744592000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11390-016-1610-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,1]]},"references-count":21,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2016,1]]}},"alternative-id":["1610"],"URL":"https:\/\/doi.org\/10.1007\/s11390-016-1610-1","relation":{},"ISSN":["1000-9000","1860-4749"],"issn-type":[{"type":"print","value":"1000-9000"},{"type":"electronic","value":"1860-4749"}],"subject":[],"published":{"date-parts":[[2016,1]]}}}