{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T16:52:24Z","timestamp":1776963144020,"version":"3.51.4"},"reference-count":15,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008,11]]},"DOI":"10.1109\/sc.2008.5222004","type":"proceedings-article","created":{"date-parts":[[2009,8,28]],"date-time":"2009-08-28T14:48:47Z","timestamp":1251470927000},"page":"1-12","source":"Crossref","is-referenced-by-count":251,"title":["Stencil computation optimization and auto-tuning on state-of-the-art multicore architectures"],"prefix":"10.1109","author":[{"given":"K.","family":"Datta","sequence":"first","affiliation":[]},{"given":"M.","family":"Murphy","sequence":"additional","affiliation":[]},{"given":"V.","family":"Volkov","sequence":"additional","affiliation":[]},{"given":"S.","family":"Williams","sequence":"additional","affiliation":[]},{"given":"J.","family":"Carter","sequence":"additional","affiliation":[]},{"given":"L.","family":"Oliker","sequence":"additional","affiliation":[]},{"given":"D.","family":"Patterson","sequence":"additional","affiliation":[]},{"given":"J.","family":"Shalf","sequence":"additional","affiliation":[]},{"given":"K.","family":"Yelick","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"15","doi-asserted-by":"publisher","DOI":"10.1145\/1362622.1362674"},{"key":"13","doi-asserted-by":"publisher","DOI":"10.1145\/1111583.1111589"},{"key":"14","author":"mccalpin","year":"0","journal-title":"Stream Sustainable Memory Bandwidth in High Performance Computers"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-8191(00)00087-9"},{"key":"12","year":"0"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1177\/1094342004041295"},{"key":"2","doi-asserted-by":"publisher","DOI":"10.1016\/0021-9991(84)90073-1"},{"key":"1","year":"0"},{"key":"10","year":"0","journal-title":"NVIDIA CUDA Programming Guide 1 1"},{"key":"7","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2008.4536295"},{"key":"6","doi-asserted-by":"publisher","DOI":"10.1145\/1178597.1178605"},{"key":"5","doi-asserted-by":"crossref","DOI":"10.1145\/379539.379586","article-title":"blocking and array contraction across arbitrarily nested loops using affine partitioning","author":"lim","year":"2001","journal-title":"Proc Sixth ACM SIGPLAN Symp Principles and Practice of Parallel Programming"},{"key":"4","year":"0"},{"key":"9","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1128022.1128023","article-title":"chip multiprocessing and the cell broadband engine","author":"gschwind","year":"2006","journal-title":"CF '06 Proceedings of the 3rd Conference on Computing Frontiers"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1145\/1128022.1128027"}],"event":{"name":"2008 SC - International Conference for High Performance Computing, Networking, Storage and Analysis","location":"Austin, TX","start":{"date-parts":[[2008,11,15]]},"end":{"date-parts":[[2008,11,21]]}},"container-title":["2008 SC - International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/5206875\/5213127\/05222004.pdf?arnumber=5222004","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,11]],"date-time":"2025-02-11T22:21:25Z","timestamp":1739312485000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5222004\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008,11]]},"references-count":15,"URL":"https:\/\/doi.org\/10.1109\/sc.2008.5222004","relation":{},"subject":[],"published":{"date-parts":[[2008,11]]}}}