{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T07:20:55Z","timestamp":1729668055973,"version":"3.28.0"},"reference-count":20,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013,5]]},"DOI":"10.1109\/iscas.2013.6571993","type":"proceedings-article","created":{"date-parts":[[2013,8,14]],"date-time":"2013-08-14T11:40:23Z","timestamp":1376480423000},"page":"901-904","source":"Crossref","is-referenced-by-count":0,"title":["DRAM access reduction in GPUs by thread-block scheduling for overlapped data reuse"],"prefix":"10.1109","author":[{"family":"Seungyeol Lee","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"family":"Wonyong Sung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"19","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2006.876103"},{"journal-title":"Efficient Matrix Multiplication Using Cache Conscious Data Layouts","year":"2001","author":"park","key":"17"},{"key":"18","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"15","doi-asserted-by":"publisher","DOI":"10.1145\/1356058.1356084"},{"key":"16","doi-asserted-by":"publisher","DOI":"10.1145\/1345206.1345220"},{"journal-title":"Programming Massively Parallel Processors A Hands-on Approach","year":"2010","author":"kirk","key":"13"},{"key":"14","article-title":"Program optimization study on a 128-core GPU","author":"ryoo","year":"2007","journal-title":"First Workshop on General Purpose Processing on Graphics Processing Units"},{"journal-title":"NVIDIA CUDA C Programming Guide","year":"2012","key":"11"},{"key":"12","first-page":"1","article-title":"Dymaxion: Optimizing memory access patterns for heterogeneous systems","author":"che","year":"2011","journal-title":"Proc High Performance Computing Networking Storage and Analysis (SC)"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1145\/29873.29875"},{"year":"0","key":"20"},{"key":"2","doi-asserted-by":"publisher","DOI":"10.1145\/502874.502897"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815998"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1145\/237090.237151"},{"key":"7","doi-asserted-by":"publisher","DOI":"10.1145\/76263.76337"},{"key":"6","doi-asserted-by":"crossref","first-page":"587","DOI":"10.1016\/0743-7315(88)90014-7","article-title":"Strategies for cache and local memory management by global program transformation","volume":"5","author":"gannon","year":"1988","journal-title":"Journal of Parallel and Distributed Computing"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.1999.807510"},{"key":"4","article-title":"Optimizing supercompilers for supercomputers","author":"wolfe","year":"1989","journal-title":"Research Monographs in Parallel and Distributed Computing"},{"key":"9","doi-asserted-by":"publisher","DOI":"10.1145\/301618.301668"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1145\/113445.113449"}],"event":{"name":"2013 IEEE International Symposium on Circuits and Systems (ISCAS)","start":{"date-parts":[[2013,5,19]]},"location":"Beijing","end":{"date-parts":[[2013,5,23]]}},"container-title":["2013 IEEE International Symposium on Circuits and Systems (ISCAS2013)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6560459\/6571764\/06571993.pdf?arnumber=6571993","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,10,14]],"date-time":"2020-10-14T11:59:23Z","timestamp":1602676763000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/6571993"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,5]]},"references-count":20,"URL":"https:\/\/doi.org\/10.1109\/iscas.2013.6571993","relation":{},"subject":[],"published":{"date-parts":[[2013,5]]}}}