{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,22]],"date-time":"2025-02-22T05:35:01Z","timestamp":1740202501449,"version":"3.37.3"},"reference-count":57,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010,4]]},"DOI":"10.1109\/ipdps.2010.5470465","type":"proceedings-article","created":{"date-parts":[[2010,6,2]],"date-time":"2010-06-02T20:25:07Z","timestamp":1275510307000},"page":"1-12","source":"Crossref","is-referenced-by-count":6,"title":["Exploiting inter-thread temporal locality for chip multithreading"],"prefix":"10.1109","author":[{"family":"Jiayuan Meng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jeremy W","family":"Sheaffer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kevin","family":"Skadron","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"journal-title":"NVIDIA CUDA Compute Unified Device Architecture Programming Guide","article-title":"NVIDIA Corporation","year":"2007","key":"ref39"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2006.302743"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.1997.645803"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/1273440.1250683"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/1346281.1346311"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2005.35"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2009.5413143"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/329466.329484"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/SUPERC.1992.236705"},{"key":"ref34","first-page":"161","article-title":"Smart Memories: a modular reconfigurable architecture","author":"mai","year":"2000","journal-title":"Proceedings of 27th International Symposium on Computer Architecture (IEEE Cat No RS00201) ISCA"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/HPCSA.2002.1019154"},{"key":"ref27","first-page":"429","article-title":"Optimizing matrix operations on a parallel multiprocessor with a hierarchical memory system","author":"jalby","year":"1986","journal-title":"Proc Int Conf Parallel Processing"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/258915.258946"},{"journal-title":"NVIDIA Corporation","article-title":"NVIDIAs next generation CUDA compute architecture: Fermi","year":"2009","key":"ref2"},{"year":"0","key":"ref1","article-title":"LEON2 Processor"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2008.5222004"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2006.55"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2005.42"},{"key":"ref24","doi-asserted-by":"crossref","DOI":"10.1145\/1128022.1128023","article-title":"Chip multiprocessing and the Cell Broadband Engine","author":"gschwind","year":"2006","journal-title":"CF '06"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/1088149.1088197"},{"key":"ref26","first-page":"305","article-title":"A comprehensive approach to dram power management","author":"hur","year":"2008","journal-title":"HPCA '08"},{"key":"ref25","first-page":"878","article-title":"Compact thermal modeling for temperature-aware design","author":"wei huang","year":"2004","journal-title":"Proceedings 41st Design Automation Conference 2004 DAC"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2002.995703"},{"key":"ref51","article-title":"Cacti 4.0. Technical Report HPL-2006&#x2013;86","author":"tarjan","year":"2006","journal-title":"HP Laboratories Palo Alto"},{"key":"ref57","article-title":"An adaptive OpenMP loop scheduler for hyperthreaded SMPs","author":"zhang","year":"0","journal-title":"PDCS'04 2004"},{"key":"ref56","first-page":"151","article-title":"Exploiting choice in resizable cache design to optimize deep-submicron processor energy-delay","author":"yang","year":"2002","journal-title":"HPCA'02"},{"year":"0","key":"ref55","article-title":"Inc. XILINX. Virtex-ii pro and virtex-ii pro x fpga user guide"},{"key":"ref54","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1109\/ISCA.1995.524546","article-title":"The SPLASH-2 programs: characterization and methodological considerations","author":"woo","year":"1995","journal-title":"Proceedings 22nd Annual International Symposium on Computer Architecture ISCA"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/LPE.2006.4271823"},{"key":"ref52","doi-asserted-by":"crossref","first-page":"139","DOI":"10.1006\/jpdc.1995.1014","article-title":"Evaluating the performance of cache-affinity scheduling in shared-memory multiprocessors","volume":"24","author":"torrellas","year":"1995","journal-title":"J Parallel Distrib Comput"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/IPPS.1995.395939"},{"key":"ref40","article-title":"Thread-sensitive scheduling for smt processors","author":"parekh","year":"2000","journal-title":"Technical Report"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2005.27"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2009.4798258"},{"key":"ref13","article-title":"A performance study of general purpose applications on graphisc processors using CUDA","author":"che","year":"2008","journal-title":"JPDC '08"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1145\/1248377.1248396","article-title":"Scheduling threads for constructive cache sharing on CMPs","author":"chen","year":"2007","journal-title":"SPAA'07"},{"year":"0","key":"ref15","article-title":"Intel Corporation. Intel threading building blocks"},{"year":"0","key":"ref16","article-title":"Intel Corporation. Pircture the future now: Intel AVX"},{"journal-title":"GeForce GTX 280 Specifications","article-title":"NVIDIA Corporation","year":"2008","key":"ref17"},{"year":"1997","author":"dagum","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/1048935.1050187"},{"key":"ref4","article-title":"The landscape of parallel computing research: A view from Berkeley","author":"asanovic","year":"2006","journal-title":"Technical Report UCB\/EECS-2006&#x2013;183 EECS Department University of California Berkeley"},{"key":"ref3","first-page":"817","article-title":"Cactus grid computing: Review of current development","author":"allen","year":"2001","journal-title":"Euro-Par '01"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/215399.215403"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2006.82"},{"journal-title":"OpenMP Application Program Interface","article-title":"OpenMP Architecture Review Board","year":"2008","key":"ref8"},{"key":"ref49","first-page":"214","article-title":"Affinity scheduling of unbalanced workloads","author":"subramaniam","year":"1994","journal-title":"SC'94"},{"article-title":"Executing multithreaded programs efficiently","year":"1995","author":"blumofe","key":"ref7"},{"key":"ref9","first-page":"83","article-title":"Wattch: a framework for architectural-level power analysis and optimizations","author":"brooks","year":"2000","journal-title":"Proceedings of 27th International Symposium on Computer Architecture (IEEE Cat No RS00201) ISCA"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/1360612.1360617"},{"key":"ref45","first-page":"179","article-title":"Tiling of iteration spaces for multicomputers","author":"ramanujam","year":"1990","journal-title":"Proc 1990 Int Conf Parallel Processing"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/378993.379244"},{"key":"ref47","doi-asserted-by":"crossref","first-page":"221","DOI":"10.3233\/EMC-2006-00027","article-title":"A dynamically reconfigurable cache for multithreaded processors","volume":"2","author":"settle","year":"2006","journal-title":"J Embedded Comput"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/514191.514227"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/237090.237151"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2007.4378785"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcp.2006.10.023"}],"event":{"name":"Distributed Processing (IPDPS)","start":{"date-parts":[[2010,4,19]]},"location":"Atlanta, GA","end":{"date-parts":[[2010,4,23]]}},"container-title":["2010 IEEE International Symposium on Parallel &amp; Distributed Processing (IPDPS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/5465899\/5470342\/05470465.pdf?arnumber=5470465","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T15:48:42Z","timestamp":1740152922000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5470465\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,4]]},"references-count":57,"URL":"https:\/\/doi.org\/10.1109\/ipdps.2010.5470465","relation":{},"subject":[],"published":{"date-parts":[[2010,4]]}}}