{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T14:07:53Z","timestamp":1773842873661,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":64,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642244025","type":"print"},{"value":"9783642244032","type":"electronic"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2011]]},"DOI":"10.1007\/978-3-642-24403-2_23","type":"book-chapter","created":{"date-parts":[[2011,10,17]],"date-time":"2011-10-17T11:56:27Z","timestamp":1318852587000},"page":"293-312","source":"Crossref","is-referenced-by-count":7,"title":["Hardware Performance Monitoring for the Rest of Us: A Position and Survey"],"prefix":"10.1007","author":[{"given":"Tipp","family":"Moseley","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Neil","family":"Vachharajani","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"William","family":"Jalby","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"23_CR1","volume-title":"IEEE\/ACM International Symposium on Code Generation and Optimization","author":"L. Liu","year":"2009","unstructured":"Liu, L., Rus, S.: Perflint: A context sensitive performance advisor for c++ programs. In: IEEE\/ACM International Symposium on Code Generation and Optimization. IEEE Computer Society, Los Alamitos (2009)"},{"key":"23_CR2","doi-asserted-by":"crossref","unstructured":"Tallent, N.R., Mellor-Crummey, J.M., Fagan, M.W.: Binary analysis for measurement and attributionof program performance. In: PLDI (2009)","DOI":"10.1145\/1542476.1542526"},{"key":"23_CR3","doi-asserted-by":"crossref","unstructured":"Moseley, T., Connors, D.A., Grunwald, D., Peri, R.: Identifying potential parallelism via loopcentricprofiling. In: Proceedings of the 2007 International Conference on Computing Frontiers (May 2007)","DOI":"10.1145\/1242531.1242554"},{"key":"23_CR4","doi-asserted-by":"crossref","unstructured":"Price, G.D., Giacomoni, J., Vachharajani, M.: Visualizing potential parallelism in sequential programs. In: PACT (2008)","DOI":"10.1145\/1454115.1454129"},{"key":"23_CR5","doi-asserted-by":"crossref","unstructured":"Zilles, C.B.: Benchmark health considered harmful. SIGARCH Computer Architecture News (2001)","DOI":"10.1145\/503205.503206"},{"key":"23_CR6","volume-title":"CGO 2009: Proceedings of the International Symposium on Code Generation and Optimization","author":"T. Moseley","year":"2009","unstructured":"Moseley, T., Grunwald, D., Peri, R.V.: Optiscope: Performance accountability for optimizing compilers. In: CGO 2009: Proceedings of the International Symposium on Code Generation and Optimization. IEEE Computer Society, Seattle (2009)"},{"key":"23_CR7","doi-asserted-by":"crossref","unstructured":"Mytkowicz, T., Diwan, A., Hauswirth, M., Sweeney, P.F.: Producing wrong data without doing anything obviously wrong! In: ASPLOS (2009)","DOI":"10.1145\/1508244.1508275"},{"key":"23_CR8","doi-asserted-by":"crossref","unstructured":"Moseley, T., Shye, A., Reddi, V.J., Iyer, M., Fay, D., Hodgdon, D., Kihm, J.L., Settle, A., Grunwald, D., Connors, D.A.: Dynamic run-time architecture techniques for enabling continuous optimization. In: Proceedings of the 2005 International Conference on Computing Frontiers (May 2005)","DOI":"10.1145\/1062261.1062296"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"Knights, D., Mytkowicz, T., Sweeney, P.F., Mozer, M.C., Diwan, A.: Blind optimization for exploiting hardware features. In: Conference on Compiler Construction (2009)","DOI":"10.1007\/978-3-642-00722-4_18"},{"key":"23_CR10","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1145\/1152154.1152182","volume-title":"PACT 2006: Proceedings of the 15th International Conference on Parallel Architectures and Compilation Techniques","author":"Z. Pan","year":"2006","unstructured":"Pan, Z., Eigenmann, R.: Fast, automatic, procedure-level performance tuning. In: PACT 2006: Proceedings of the 15th International Conference on Parallel Architectures and Compilation Techniques, pp. 173\u2013181. ACM Press, New York (2006)"},{"key":"23_CR11","doi-asserted-by":"crossref","unstructured":"Whaley, C.R., Dongarra, J.J.: Automatically tuned linear algebra software. In: Supercomputing 1998: Proceedings of the 1998 ACM\/IEEE Conference on Supercomputing, CDROM (1998)","DOI":"10.1109\/SC.1998.10004"},{"key":"23_CR12","unstructured":"Callister, J.: Confessions of a performance monitor hardware designer. In: Workshop on Hardware Performance Monitor Design and Functionality Colocated with HPCA (2005)"},{"key":"23_CR13","unstructured":"Amd lightweight profiling specification, http:\/\/developer.amd.com\/cpu\/LWP\/Pages\/default.aspx"},{"key":"23_CR14","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/268998.266637","volume-title":"SOSP 1997: Proceedings of the Sixteenth ACM Symposium on Operating Systems Principles","author":"J.M. Anderson","year":"1997","unstructured":"Anderson, J.M., Berc, L.M., Dean, J., Ghemawat, S., Henzinger, M.R., Leung, S.-T.A., Sites, R.L., Vandevoorde, M.T., Waldspurger, C.A., Weihl, W.E.: Continuous profiling: where have all the cycles gone? In: SOSP 1997: Proceedings of the Sixteenth ACM Symposium on Operating Systems Principles, pp. 1\u201314. ACM Press, New York (1997)"},{"key":"23_CR15","unstructured":"OProfile, http:\/\/oprofile.sourceforge.net"},{"key":"23_CR16","unstructured":"Intel platform modeling tool with machine learning, http:\/\/software.intel.com\/en-us\/articles\/intel-platform-modeling-with-machine-learning\/"},{"key":"23_CR17","unstructured":"Intel64 and IA-32 Architectures Software Developer\u2019s Manual - Volume 3B, Intel Corporation"},{"key":"23_CR18","unstructured":"Dean, J., Hicks, J.E., Waldspurger, C.A., Weihl, W.E., Chrysos, G.Z.: Profileme: Hardware support for instruction-level profiling on out-of-order processors. In: International Symposium on Microarchitecture, pp. 292\u2013302 (1997), citeseer.ist.psu.edu\/dean97profileme.html"},{"key":"23_CR19","unstructured":"Drongowski, P.: Instruction-based sampling: A new performance analysis technique for amd family 10h processors (2007)"},{"key":"23_CR20","unstructured":"Intel Corporation, Intel Itanium 2 processor reference manual: For software development and optimization (May 2004)"},{"key":"23_CR21","unstructured":"Workshop on hardware performance monitor design and functionality colocated with hpca (2005), http:\/\/lacsi.rice.edu\/workshops\/hpca11"},{"key":"23_CR22","unstructured":"v2 of comments on performance counters for linux, pcl (2009), http:\/\/lkml.org\/lkml\/2009\/6\/16\/432"},{"key":"23_CR23","unstructured":"Hunter, H.C., Nair, R.: Refining performance monitor design. In: Proceedings of the 2004 Workshop on Complexity Effective Design, WCED (2004)"},{"key":"23_CR24","doi-asserted-by":"crossref","unstructured":"Cavazos, J., Dubach, C., Agakov, F., Bonilla, E., O\u2019Boyle, M.F., Fursin, G., Temam, O.: Automatic performance model construction for the fast software exploration of new hardware designs. In: International Conference on Compilers, Architecture, And Synthesis For Embedded Systems (CASES 2006) (October 2006)","DOI":"10.1145\/1176760.1176765"},{"key":"23_CR25","doi-asserted-by":"crossref","unstructured":"Sprunt, B.: Performance monitoring hardware will always be a low priority, second class feature in processor designs until. In: Workshop on Hardware Performance Monitor Design and Functionality Colocated with HPCA (2005)","DOI":"10.1201\/9781420037425.ch11"},{"key":"23_CR26","doi-asserted-by":"crossref","unstructured":"Moseley, T., Kihm, J.L., Connors, D.A., Grunwald, D.: Methods for modeling resource contention on simultaneous multithreading processors. In: Proceedings of the 2005 International Conference on Computer Design (ICCD) (October 2005)","DOI":"10.1109\/ICCD.2005.74"},{"key":"23_CR27","doi-asserted-by":"crossref","unstructured":"Ould-Ahmed-Vall, E., Woodlee, J., Yount, C., Doshi, K.A., Abraham, S.: Using model trees for computer architecture performance analysis of software applications. In: ISPASS (2007)","DOI":"10.1109\/ISPASS.2007.363742"},{"key":"23_CR28","unstructured":"Dai, X., Zhai, A., Hsu, W.-C., Yew, P.-C.: A general compiler framework for speculative optimizations using data speculative code motion. In: CGO 2005: Proceedings of the International Symposium on Code Generation and Optimization (2005)"},{"key":"23_CR29","unstructured":"Canturk Isci, M.M., Contreras, G.: Hardware performance counters for detailed runtime power and thermal estimations: Experiences and proposals. In: Workshop on Hardware Performance Monitor Design and Functionality Colocated with HPCA (2005)"},{"key":"23_CR30","unstructured":"Moseley, T.: Adaptive thread scheduling for simultaneous multithreading processors, Boulder, CO. (March 2006)"},{"key":"23_CR31","doi-asserted-by":"crossref","unstructured":"Shye, A., Iyer, M., Moseley, T., Hodgdon, D., Fay, D., Reddi, V.J., Connors, D.A.: Analyis of path profiling information generated with performance monitoring hardware. In: INTERACT 2005: Proceedings of the 9th Annual Workshop on Interaction between Compilers and Computer Architectures, pp. 34\u201343. IEEE Computer Society, Washington, DC (2005)","DOI":"10.1109\/INTERACT.2005.3"},{"key":"23_CR32","doi-asserted-by":"crossref","unstructured":"Shye, A., \u00d6zisikyilmaz, B., Mallik, A., Memik, G., Dinda, P.A., Dick, R.P., Choudhary, A.N.: Learning and leveraging the relationship between architecture-level measurements and individual user satisfaction. In: ISCA (2008)","DOI":"10.1109\/ISCA.2008.29"},{"key":"23_CR33","unstructured":"Tikir, M.M., Buck, B.R., Hollingsworth, J.K.: What we need to be able to count to tune programs. In: Workshop on Hardware Performance Monitor Design and Functionality Colocated with HPCA (2005)"},{"key":"23_CR34","unstructured":"Tuduce, I., Gross, T.: Efficient collection of information on the locality of accesses. In: Workshop on Hardware Performance Monitor Design and Functionality Colocated with HPCA (2005)"},{"key":"23_CR35","unstructured":"Brantley, B.: The NUMA challenge. In: Workshop on Hardware Performance Monitor Design and Functionality Colocated with HPCA (2005)"},{"key":"23_CR36","unstructured":"Rishi, A., Masamitsu, J.A.: Us patent no. 5953530. method and apparatus for run-time memory access checking and memory leak detection"},{"key":"23_CR37","doi-asserted-by":"crossref","unstructured":"Conte, T.M., Patel, B.A., Menezes, K.N., Cox, J.S.: Hardware-based profiling: an effective Technique for profile-driven optimization. Int. J. Parallel Programming (1996)","DOI":"10.1007\/BF03356747"},{"key":"23_CR38","doi-asserted-by":"crossref","unstructured":"Fields, B.A., Bodik, R., Hill, M.D., Newburn, C.J.: Interaction cost and shotgun profiling. ACM Trans. Architecture Code Optimization (2004)","DOI":"10.1145\/1022969.1022971"},{"key":"23_CR39","doi-asserted-by":"crossref","unstructured":"Zilles, C.B., Sohi, G.S.: A programmable co-processor for profiling. In: HPCA (2001)","DOI":"10.1109\/HPCA.2001.903267"},{"key":"23_CR40","doi-asserted-by":"crossref","unstructured":"Weaver, V.M., McKee, S.A.: Can hardware performance counters be trusted? In: IISWC (2008)","DOI":"10.1109\/IISWC.2008.4636099"},{"key":"23_CR41","unstructured":"Mucci, P., Smeds, N., Ekman, P.: Performance monitoring with papi using the performance Application programming interface. Dr. Dobb\u2019s (2005), http:\/\/www.ddj.com\/developmenttools\/184406109"},{"key":"23_CR42","unstructured":"Mucci, P.: Towards a flexible and realistic hardware performance monitor infrastructure. In: Workshop on Hardware Performance Monitor Design and Functionality Colocated with HPCA (2005)"},{"key":"23_CR43","doi-asserted-by":"crossref","unstructured":"Sprunt, B.: Managing the complexity of performance monitoring hardware: The brink andabyss Approach. Int. J. High Perform. Comput. Appl. (2006)","DOI":"10.1177\/1094342006064569"},{"key":"23_CR44","unstructured":"Daniel Molka, R.S., Hackenberg, D., Mller, M.S.: Memory performance and cache coherency effects on an intel nehalem multiprocessor system"},{"key":"23_CR45","unstructured":"Fowler, R.: Performance hardware if i ran the world. In: Workshop on Hardware Performance Monitor Design and Functionality Colocated with HPCA (2005)"},{"key":"23_CR46","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"291","DOI":"10.1007\/978-3-540-77560-7_20","volume-title":"HiPEAC","author":"R. Levin","year":"2008","unstructured":"Levin, R., Newman, I., Haber, G.: Complementing missing and inaccurate profiling using a minimum cost circulation algorithm. In: Stenstr\u00f6m, P., Dubois, M., Katevenis, M., Gupta, R., Ungerer, T. (eds.) HiPEAC 2007. LNCS, vol.\u00a04917, pp. 291\u2013304. Springer, Heidelberg (2008)"},{"key":"23_CR47","doi-asserted-by":"crossref","unstructured":"Todd Mytkowicz, D.C., Diwan, A.: Inferred call path profiling. In: OOPSLA (2009)","DOI":"10.1145\/1640089.1640102"},{"key":"23_CR48","unstructured":"AMD Code Analyst, http:\/\/developer.amd.com\/cpu\/CodeAnalyst\/Pages\/default.aspx"},{"key":"23_CR49","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1145\/1065010.1065034","volume-title":"PLDI 2005: Proceedings of the 2005 ACM SIGPLAN Conference on Programming Language Design and Implementation","author":"C.-K. Luk","year":"2005","unstructured":"Luk, C.-K., Cohn, R., Muth, R., Patil, H., Klauser, A., Lowney, G., Wallace, S., Reddi, V.J., Hazelwood, K.: Pin: building customized program analysis tools with dynamic instrumentation. In: PLDI 2005: Proceedings of the 2005 ACM SIGPLAN Conference on Programming Language Design and Implementation, pp. 190\u2013200. ACM Press, New York (2005)"},{"key":"23_CR50","doi-asserted-by":"crossref","unstructured":"Nethercote, N., Seward, J.: Valgrind: A framework for heavyweight dynamic binary instrumentation. In: Proceedings of ACM SIGPLAN 2007 Conference on Programming Language Design and Implementation (PLDI 2007), San Diego, California, USA (2007)","DOI":"10.1145\/1250734.1250746"},{"key":"23_CR51","unstructured":"Dyninst: An application program interface (api) for runtime code generation, http:\/\/www.dyninst.org"},{"key":"23_CR52","unstructured":"Bruening, D.L.: Efficient, transparent, and comprehensive runtime code manipulation. Ph.D. dissertation, Cambridge, MA, USA (2004)"},{"key":"23_CR53","doi-asserted-by":"crossref","unstructured":"Magnusson, P.S., Christensson, M., Eskilson, J., Forsgren, D., Hllberg, G., Hgberg, J., Larsson, F., Moestedt, A., Werner, B.: Simics: A full system simulation platform. Computer (2002)","DOI":"10.1109\/2.982916"},{"key":"23_CR54","unstructured":"Valgrind\u2019s tools suite, http:\/\/valgrind.org\/info\/tools.html"},{"issue":"3","key":"23_CR55","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1109\/MM.2007.56","volume":"27","author":"K. Hoste","year":"2007","unstructured":"Hoste, K., Eeckhout, L.: Microarchitecture-independent workload characterization. IEEE Micro\u00a027(3), 63\u201372 (2007)","journal-title":"IEEE Micro"},{"key":"23_CR56","doi-asserted-by":"crossref","unstructured":"Zhang, X., Wang, Z., Gloy, N.C., Chen, J.B., Smith, M.D.: System support for automated profiling and optimization. In: SOSP (1997)","DOI":"10.1145\/268998.266640"},{"key":"23_CR57","unstructured":"Hirzel, M., Chilimbi, T.: Bursty tracing: A framework for low-overhead temporal profiling. In: 4th ACM Workshop on Feedback-Directed and Dynamic Optimization, FDDO-4 (2001), citeseer.ist.psu.edu\/hirzel01bursty.html"},{"key":"23_CR58","doi-asserted-by":"crossref","unstructured":"Arnold, M., Ryder, B.G.: A framework for reducing the cost of instrumented code. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 168\u2013179 (2001), citeseer.ist.psu.edu\/arnold01framework.html","DOI":"10.1145\/381694.378832"},{"key":"23_CR59","volume-title":"CGO 2007: Proceedings of the International Symposium on Code Generation and Optimization","author":"T. Moseley","year":"2007","unstructured":"Moseley, T., Shye, A., Reddi, V.J., Grunwald, D., Peri, R.V.: Shadow profiling: Hiding instrumentation costs with parallelism. In: CGO 2007: Proceedings of the International Symposium on Code Generation and Optimization. IEEE Computer Society, San Jose (2007)"},{"key":"23_CR60","doi-asserted-by":"crossref","unstructured":"Hoste, K., Phansalkar, A., Eeckhout, L., Georges, A., John, L.K., Bosschere, K.D.: Performance prediction based on inherent program similarity. In: PACT (2006)","DOI":"10.1145\/1152154.1152174"},{"key":"23_CR61","doi-asserted-by":"crossref","unstructured":"Shaham, R., Kolodner, E.K., Sagiv, M.: Heap profiling for space-efficient java. In: PLDI 2001: Proceedings of the ACM SIGPLAN 2001 Conference on Programming Language Design and Implementation (2001)","DOI":"10.1145\/378795.378820"},{"key":"23_CR62","unstructured":"Djoudi, L., Barthou, D., Carribault, P., Lemuet, C., Acquaviva, J.-T., Jalby, W.: Exploring application performance: a new tool for a static\/dynamic approach. In: Los Alamos Computer Science Institute Symp., Santa Fe, NM (October 2005)"},{"key":"23_CR63","unstructured":"Iyer, M., Ashok, C., Stone, J., Vachharajani, N., Connors, D.A., Vachharajani, M.: Finding parallelism for future epic machines. In: Proceedings of the Fourth Workshop on Explicitly Parallel Instruction Computer Architectures and Compiler Technology, EPIC (2005)"},{"issue":"2-3","key":"23_CR64","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1002\/cpe.774","volume":"16","author":"G. Fursin","year":"2004","unstructured":"Fursin, G., O\u2019Boyle, M., Temam, O., Watts, G.: Fast and accurate method for determining a lower bound on execution time. Concurrency: Practice and Experience\u00a016(2-3), 271\u2013292 (2004)","journal-title":"Concurrency: Practice and Experience"}],"container-title":["Lecture Notes in Computer Science","Network and Parallel Computing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-24403-2_23.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,24]],"date-time":"2020-11-24T03:15:56Z","timestamp":1606187756000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-24403-2_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011]]},"ISBN":["9783642244025","9783642244032"],"references-count":64,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-24403-2_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2011]]}}}