{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,4,6]],"date-time":"2022-04-06T03:27:30Z","timestamp":1649215650945},"reference-count":27,"publisher":"Elsevier BV","issue":"2","license":[{"start":{"date-parts":[[1997,10,1]],"date-time":"1997-10-01T00:00:00Z","timestamp":875664000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Microprocessors and Microsystems"],"published-print":{"date-parts":[[1997,10]]},"DOI":"10.1016\/s0141-9331(97)00028-8","type":"journal-article","created":{"date-parts":[[2002,7,25]],"date-time":"2002-07-25T11:25:30Z","timestamp":1027596330000},"page":"121-130","source":"Crossref","is-referenced-by-count":2,"title":["Reducing memory penalty by a programmable prefetch engine for on-chip caches"],"prefix":"10.1016","volume":"21","author":[{"given":"Tien-Fu","family":"Chen","sequence":"first","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/S0141-9331(97)00028-8_BIB1","series-title":"Proc. IBM Center of Advanced Study Conference","article-title":"A compiler framework for restructuring data declarations to enhance cache and TLB effectiveness","author":"Bacon","year":"1994"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB2","doi-asserted-by":"crossref","DOI":"10.21236\/ADA281502","article-title":"A preliminary evaluation of cache-miss-initiated prefetching techniques in scalable multiprocessors","author":"Bianchini","year":"1994"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB3","article-title":"Data Prefetching for High-Performance Processors","author":"Chen","year":"1993"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB4","doi-asserted-by":"crossref","first-page":"609","DOI":"10.1109\/12.381947","article-title":"Effective hardware-based data prefetching for highperformance processors","volume":"44","author":"Chen","year":"1995","journal-title":"IEEE Trans. on Computers"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB5","series-title":"Proc. 25th Int. Symp. on Microarchitecture","article-title":"An efficient architecture for loop based data preloading","author":"Chen","year":"1992"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB6","series-title":"Proc. 24th Int. Symp. on Microarchitecture","article-title":"Data access microarchitectures for superscalar processors with compiler-assisted data prefetching","author":"Chen","year":"1991"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB7","series-title":"Proc. Supercomputing '94","first-page":"488","article-title":"Sunder: A programmable hardware prefetch architecture for numerical loops","author":"Chiueh","year":"1994"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB8","series-title":"Proc. Int. Conf. Parallel Processing","first-page":"I 56","article-title":"Fixed and adaptive sequential prefetching in shared-memory multiprocessors","author":"Dahlgren","year":"1993"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB9","doi-asserted-by":"crossref","first-page":"385","DOI":"10.1109\/71.494633","article-title":"Evaluation of hardware-based stride and sequential prefetching in shared-memory multiprocessors","volume":"7","author":"Dahlgren","year":"1996","journal-title":"IEEE Trans. Parallel and Distributed Systems"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB10","series-title":"Proc. 1st Int. Symp. on High-Performance Computer Architecture","first-page":"68","article-title":"Effectiveness of hardware-based stride and sequential prefetching in shared-memory multiprocessors","author":"Dahlgren","year":"1995"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB11","series-title":"Proc. 18th Int. Symp. on Computer Architecture","first-page":"54","article-title":"Data prefetching in multiprocessor vector cache memories","author":"Fu","year":"1991"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB12","series-title":"Proc. 25th Int. Symp. on Microarchitecture","first-page":"102","article-title":"Stride directed prefetching in scalar processors","author":"Fu","year":"1992"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB13","series-title":"Proc. 4th ACM Int. Conf. on Supercomputing","first-page":"354","article-title":"Compiler-directed data prefetching in multiprocessors with memory hierarchies","author":"Gornish","year":"1990"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB14","series-title":"Proc. 18th Int. Symp. on Computer Architecture","first-page":"254","article-title":"Comparative emulation of latency reducing and tolerating techniques","author":"Gupta","year":"1991"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB15","series-title":"Proc. 17th Int. Symp. on Computer Architecture","first-page":"364","article-title":"Improving direct-mapped cache performance by the addition of a small fully-associative cache and prefetch buffers","author":"Jouppi","year":"1990"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB16","series-title":"Proc. 18th Int. Symp. on Computer Architecture","first-page":"43","article-title":"An architecture for software-controlled data prefetching","author":"Klaiber","year":"1991"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB17","series-title":"Proc. 20th Int. Symp. on Computer Architecture","first-page":"213","article-title":"The cedar system and an initial performance study","author":"Kuck","year":"1993"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB18","series-title":"Proc. ACM SIGPLAN 88 Conference on Programming Language Design and Implementation","first-page":"318","article-title":"Software pipelining: An effective scheduling technique for VLIW machines","author":"Lam","year":"1988"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB19","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1016\/0743-7315(91)90014-Z","article-title":"Tolerating latency through software-controlled prefetching in shared-memory multiprocessors","volume":"12","author":"Mowny","year":"1991","journal-title":"J. Parallel and Distributed Computing"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB20","series-title":"Proc. 5th Int. Conf. on Architectural Support for Programming Languages and Operating Systems","first-page":"62","article-title":"Design and evaluation of a compiler algorithm for prefetching","author":"Mowry","year":"1992"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB21","doi-asserted-by":"crossref","first-page":"98","DOI":"10.1109\/4.68123","article-title":"An area model for on-chip memories and its application","volume":"26","author":"Mulder","year":"1991","journal-title":"IEEE J. Solid-State Circuits"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB22","article-title":"Software methods for improvement of cache performance on supercomputer applications","author":"Ponterfield","year":"1989"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB23","first-page":"112","article-title":"Decoupled access\/execute computer architectures","author":"Smith","year":"1982"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB24","series-title":"Proc. 20th Int. Symp. on Computer Architecture","article-title":"Limitation of cache prefetching on a bus-based multiprocessor","author":"Tuilsen","year":"1993"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB25","doi-asserted-by":"crossref","first-page":"1147","DOI":"10.1109\/4.148323","article-title":"An analytical access time model for on-chip cache memories","volume":"27","author":"Wada","year":"1992","journal-title":"IEEE J. Solid-State Circuits"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB26","article-title":"An enhanced access and cycle time model for on-chip caches","author":"Wilton","year":"1994"},{"key":"10.1016\/S0141-9331(97)00028-8_BIB27","series-title":"Proc. ACM SIGPLAN 91 Conference on Programming Language Design and Implementation","first-page":"30","article-title":"A data locality optimizing algorithm","author":"Wolf","year":"1991"}],"container-title":["Microprocessors and Microsystems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0141933197000288?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0141933197000288?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2019,4,27]],"date-time":"2019-04-27T02:51:50Z","timestamp":1556333510000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0141933197000288"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1997,10]]},"references-count":27,"journal-issue":{"issue":"2","published-print":{"date-parts":[[1997,10]]}},"alternative-id":["S0141933197000288"],"URL":"https:\/\/doi.org\/10.1016\/s0141-9331(97)00028-8","relation":{},"ISSN":["0141-9331"],"issn-type":[{"value":"0141-9331","type":"print"}],"subject":[],"published":{"date-parts":[[1997,10]]}}}