{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,21]],"date-time":"2025-05-21T06:12:15Z","timestamp":1747807935120,"version":"3.28.0"},"reference-count":26,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011,4]]},"DOI":"10.1109\/ispass.2011.5762713","type":"proceedings-article","created":{"date-parts":[[2011,5,9]],"date-time":"2011-05-09T20:41:43Z","timestamp":1304973703000},"page":"32-43","source":"Crossref","is-referenced-by-count":16,"title":["Evaluation and optimization of multicore performance bottlenecks in supercomputing applications"],"prefix":"10.1109","author":[{"given":"Jeff","family":"Diamond","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Martin","family":"Burtscher","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"John D.","family":"McCalpin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Byoung-Do","family":"Kim","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stephen W.","family":"Keckler","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"James C.","family":"Browne","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","article-title":"Description of the NCAR Community Climate Model (CCM2)","author":"hack","year":"1993","journal-title":"Technical Report TN-382"},{"journal-title":"Intel 64 and IA-32 Software Developer's Manual Volume 3B System Programming Guide Part 2","year":"2011","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/1739025.1739031"},{"journal-title":"Longhorn User's Guide","year":"0","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/1065010.1065034"},{"journal-title":"Stream Sustainable Memory Bandwidth in High Performance Computers","year":"1991","author":"mccalpin","key":"ref15"},{"key":"ref16","first-page":"19","article-title":"Memory Bandwidth and Machine Balance in Current High Performance Computers","author":"mccalpin","year":"1995","journal-title":"IEEE Computer Society Technical Committee on Computer Architecture (TCCA) Newslet-ter"},{"journal-title":"MPI Profiler","year":"0","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2008.4536408"},{"journal-title":"NSF 0605 The High-Performance Computing Challenge Benchmarks version 2 0","year":"2005","key":"ref19"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2010.41"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/197405.197406"},{"key":"ref6","article-title":"Multicore Optimization for Ranger","author":"diamond","year":"2009","journal-title":"2009 TeraGrid Conference"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2008.5222004"},{"key":"ref8","first-page":"3","article-title":"The Impact of Multicore on Computational Science Software","volume":"3","author":"dongarra","year":"2007","journal-title":"CTWatch Quar-terly"},{"key":"ref7","article-title":"Making Sense of Performance Counter Measurements on Supercomputing Applications","author":"diamond","year":"2010","journal-title":"Technical Report TR-10-25"},{"journal-title":"BIOS and Kernel Developer's Guide (BKDG) For AMD Family 10h Processors Rev 3 48","year":"2010","key":"ref2"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/800230.806987"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"685","DOI":"10.1002\/cpe.1553","article-title":"HPCTOOLKIT: Tools for Performance Analysis of Optimized Parallel Programs","volume":"22","author":"adhianto","year":"2010","journal-title":"Concurrency and Computation Practice and Experience"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"762","DOI":"10.1016\/j.jpdc.2009.04.002","article-title":"Optimization of a Lattice Boltzmann Computation on State-of-the-art Multicore Platforms","volume":"69","author":"oliker","year":"2009","journal-title":"Journal of Parallel and Distributed Computing"},{"journal-title":"Linux Performance Counter Kernel API","year":"0","key":"ref22"},{"journal-title":"PAPI Performance Application Programming Interface","year":"0","key":"ref21"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1177\/1094342006064482"},{"journal-title":"Ranger User's Guide","year":"0","key":"ref23"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2008.12.006"}],"event":{"name":"Software (ISPASS)","start":{"date-parts":[[2011,4,10]]},"location":"Austin, TX, USA","end":{"date-parts":[[2011,4,12]]}},"container-title":["(IEEE ISPASS) IEEE INTERNATIONAL SYMPOSIUM ON PERFORMANCE ANALYSIS OF SYSTEMS AND SOFTWARE"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/5755445\/5762705\/05762713.pdf?arnumber=5762713","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,6]],"date-time":"2024-04-06T16:39:58Z","timestamp":1712421598000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/5762713\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,4]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/ispass.2011.5762713","relation":{},"subject":[],"published":{"date-parts":[[2011,4]]}}}