{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T02:51:20Z","timestamp":1725936680894},"publisher-location":"Cham","reference-count":36,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319729701"},{"type":"electronic","value":"9783319729718"}],"license":[{"start":{"date-parts":[[2017,12,23]],"date-time":"2017-12-23T00:00:00Z","timestamp":1513987200000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-72971-8_6","type":"book-chapter","created":{"date-parts":[[2017,12,22]],"date-time":"2017-12-22T08:44:54Z","timestamp":1513932294000},"page":"114-135","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["A Scalable Analytical Memory Model for CPU Performance Prediction"],"prefix":"10.1007","author":[{"given":"Gopinath","family":"Chennupati","sequence":"first","affiliation":[]},{"given":"Nandakishore","family":"Santhi","sequence":"additional","affiliation":[]},{"given":"Robert","family":"Bird","sequence":"additional","affiliation":[]},{"given":"Sunil","family":"Thulasidasan","sequence":"additional","affiliation":[]},{"given":"Abdel-Hameed A.","family":"Badawy","sequence":"additional","affiliation":[]},{"given":"Satyajayant","family":"Misra","sequence":"additional","affiliation":[]},{"given":"Stephan","family":"Eidenbenz","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,12,23]]},"reference":[{"issue":"2","key":"6_CR1","doi-asserted-by":"crossref","first-page":"184","DOI":"10.1145\/63404.63407","volume":"7","author":"A Agarwal","year":"1989","unstructured":"Agarwal, A., Hennessy, J., Horowitz, M.: An analytical cache model. ACM Trans. Comput. Syst. 7(2), 184\u2013215 (1989)","journal-title":"ACM Trans. Comput. Syst."},{"key":"6_CR2","unstructured":"Agner, F.: Instruction tables: lists of instruction latencies, throughputs and micro-operation breakdowns for intel, AMD and VIA CPUs. Technical University of Denmark, Copenhagen, Denmark (2016)"},{"issue":"2","key":"6_CR3","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1109\/2.982917","volume":"35","author":"T Austin","year":"2002","unstructured":"Austin, T., Larson, E., Ernst, D.: Simplescalar: an infrastructure for computer system modeling. Computer 35(2), 59\u201367 (2002)","journal-title":"Computer"},{"key":"6_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1007\/11549468_23","volume-title":"Euro-Par 2005 Parallel Processing","author":"DH Bailey","year":"2005","unstructured":"Bailey, D.H., Snavely, A.: Performance modeling: understanding the past and predicting the future. In: Cunha, J.C., Medeiros, P.D. (eds.) Euro-Par 2005. LNCS, vol. 3648, pp. 185\u2013195. Springer, Heidelberg (2005). https:\/\/doi.org\/10.1007\/11549468_23"},{"key":"6_CR5","first-page":"20","volume":"2004","author":"E Berg","year":"2004","unstructured":"Berg, E., Hagersten, E.: StatCache: a probabilistic approach to efficient and accurate data locality analysis. IEEE Int. Symp. ISPASS Perform. Anal. Syst. Softw. 2004, 20\u201327 (2004)","journal-title":"IEEE Int. Symp. ISPASS Perform. Anal. Syst. Softw."},{"key":"6_CR6","doi-asserted-by":"crossref","unstructured":"Bienia, C., Kumar, S., Singh, J.P., Li, K.: The parsec benchmark suite: characterization and architectural implications. In: Proceedings of the 17th International Conference on Parallel Architectures and Compilation Techniques, PACT 2008, New York, NY, USA, pp. 72\u201381. ACM (2008)","DOI":"10.1145\/1454115.1454128"},{"key":"6_CR7","unstructured":"Brehob, M., Enbody, R.: An analytical model of locality and caching. Technical report MSU-CSE-99-31 (1999)"},{"issue":"3","key":"6_CR8","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1177\/109434200001400303","volume":"14","author":"S Browne","year":"2000","unstructured":"Browne, S., Dongarra, J., Garner, N., Ho, G., Mucci, P.: A portable programming interface for performance evaluation on modern processors. Int. J. High Perform. Comput. Appl. 14(3), 189\u2013204 (2000)","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"6_CR9","doi-asserted-by":"crossref","unstructured":"Chatterjee, S., Parker, E., Hanlon, P.J., Lebeck, A.R.: Exact analysis of the cache behavior of nested loops. In: Proceedings of the ACM SIGPLAN 2001 Conference on Programming Language Design and Implementation, PLDI 2001, New York, NY, USA, pp. 286\u2013297. ACM (2001)","DOI":"10.1145\/378795.378859"},{"issue":"3","key":"6_CR10","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1145\/2425676.2425691","volume":"19","author":"JW Choi","year":"2013","unstructured":"Choi, J.W., Vuduc, R.W.: How much (execution) time and energy does my algorithm cost? XRDS 19(3), 49\u201351 (2013)","journal-title":"XRDS"},{"issue":"12","key":"6_CR11","first-page":"3537","volume":"65","author":"SV Steen den","year":"2016","unstructured":"den Steen, S.V., Eyerman, S., Pestel, S.D., Mechri, M., Carlson, T.E., Black-Schaffer, D., Hagersten, E., Eeckhout, L.: Analytical processor performance and power modeling using micro-architecture independent characteristics. IEEE Trans. Comput. 65(12), 3537\u20133551 (2016)","journal-title":"IEEE Trans. Comput."},{"key":"6_CR12","doi-asserted-by":"crossref","unstructured":"Ding, C., Zhong, Y.: Predicting whole-program locality through reuse distance analysis. In: Proceedings of the ACM SIGPLAN 2003 Conference on Programming Language Design and Implementation, PLDI 2003, pp. 245\u2013257. ACM (2003)","DOI":"10.1145\/781131.781159"},{"key":"6_CR13","doi-asserted-by":"crossref","unstructured":"Eeckhout, L., de Bosschere, K., Neefs, H.: Performance analysis through synthetic trace generation. In: Proceedings of the IEEE International Symposium on Performance Analysis of Systems and Software, ISPASS 2000, Washington, DC, USA, pp. 1\u20136. IEEE (2000)","DOI":"10.1109\/ISPASS.2000.842273"},{"key":"6_CR14","doi-asserted-by":"crossref","unstructured":"Fang, C., Carr, S., \u00d6nder, S., Wang, Z.: Reuse-distance-based miss-rate prediction on a per instruction basis. In: Proceedings of the 2004 Workshop on Memory System Performance, MSP 2004, New York, NY, USA, pp. 60\u201368. ACM (2004)","DOI":"10.1145\/1065895.1065906"},{"key":"6_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1007\/3-540-45545-0_15","volume-title":"Computational Science \u2014 ICCS 2001","author":"JA Gunnels","year":"2001","unstructured":"Gunnels, J.A., Henry, G.M., van de Geijn, R.A.: A family of high-performance matrix multiplication algorithms. In: Alexandrov, V.N., Dongarra, J.J., Juliano, B.A., Renner, R.S., Tan, C.J.K. (eds.) ICCS 2001. LNCS, vol. 2073, pp. 51\u201360. Springer, Heidelberg (2001). https:\/\/doi.org\/10.1007\/3-540-45545-0_15"},{"key":"6_CR16","doi-asserted-by":"crossref","unstructured":"Hassan, R., Harris, A., Topham, N., Efthymiou, A.: Synthetic trace-driven simulation of cache memory. In: 21st International Conference on Advanced Information Networking and Applications Workshops, vol. 1 of AINAW 2007, pp. 764\u2013771 (2007)","DOI":"10.1109\/AINAW.2007.345"},{"key":"6_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"196","DOI":"10.1007\/11549468_24","volume-title":"Euro-Par 2005 Parallel Processing","author":"E Ipek","year":"2005","unstructured":"Ipek, E., de Supinski, B.R., Schulz, M., McKee, S.A.: An approach to performance prediction for parallel applications. In: Cunha, J.C., Medeiros, P.D. (eds.) Euro-Par 2005. LNCS, vol. 3648, pp. 196\u2013205. Springer, Heidelberg (2005). https:\/\/doi.org\/10.1007\/11549468_24"},{"key":"6_CR18","doi-asserted-by":"crossref","unstructured":"Ipek, E., McKee, S.A., Caruana, R., de Supinski, B.R., Schulz, M.: Efficiently exploring architectural design spaces via predictive modeling. In: Proceedings of the 12th International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS XII, New York, NY, USA, pp. 195\u2013206. ACM (2006)","DOI":"10.1145\/1168857.1168882"},{"key":"6_CR19","doi-asserted-by":"crossref","unstructured":"Islam, T.Z., Thiagarajan, J.J., Bhatele, A., Schulz, M., Gamblin, T.: A machine learning framework for performance coverage analysis of proxy applications. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2016, Piscataway, NJ, USA, pp. 46:1\u201346:12. IEEE (2016)","DOI":"10.1109\/SC.2016.45"},{"key":"6_CR20","doi-asserted-by":"crossref","unstructured":"Jain, N., Bhatele, A., Robson, M.P., Gamblin, T., Kale, L.V.: Predicting application performance using supervised learning on communication features. In: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, SC 2013, New York, NY, USA, pp. 95:1\u201395:12. ACM (2013)","DOI":"10.1145\/2503210.2503263"},{"key":"6_CR21","doi-asserted-by":"crossref","unstructured":"Lattner, C., Adve, V.: Llvm: a compilation framework for lifelong program analysis & transformation. In: Proceedings of the International Symposium on Code Generation and Optimization: Feedback-directed and Runtime Optimization, CGO 2004, Washington, DC, USA, pp. 75\u201387. IEEE (2004)","DOI":"10.1109\/CGO.2004.1281665"},{"key":"6_CR22","doi-asserted-by":"crossref","unstructured":"Luk, C.-K., Cohn, R., Muth, R., Patil, H., Klauser, A., Lowney, G., Wallace, S., Reddi, V.J., Hazelwood, K.: Pin: building customized program analysis tools with dynamic instrumentation. In: Proceedings of the 2005 ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI 2005, New York, NY, USA, pp. 190\u2013200. ACM (2005)","DOI":"10.1145\/1065010.1065034"},{"key":"6_CR23","doi-asserted-by":"crossref","unstructured":"Luszczek, P.R., Bailey, D.H., Dongarra, J.J., Kepner, J., Lucas, R.F., Rabenseifner, R., Takahashi, D.: The hpc challenge (hpcc) benchmark suite. In: Proceedings of the 2006 ACM\/IEEE Conference on Supercomputing, SC 2006, New York, NY, USA. ACM (2006)","DOI":"10.1145\/1188455.1188677"},{"issue":"2","key":"6_CR24","doi-asserted-by":"crossref","first-page":"78","DOI":"10.1147\/sj.92.0078","volume":"9","author":"RL Mattson","year":"1970","unstructured":"Mattson, R.L., Gecsei, J., Slutz, D.R., Traiger, I.L.: Evaluation techniques for storage hierarchies. IBM Syst. J. 9(2), 78\u2013117 (1970)","journal-title":"IBM Syst. J."},{"key":"6_CR25","doi-asserted-by":"crossref","unstructured":"Nethercote, N., Seward, J.: Valgrind: a framework for heavyweight dynamic binary instrumentation. In: Proceedings of the 28th ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI 2007, New York, NY, USA, pp. 89\u2013100. ACM (2007)","DOI":"10.1145\/1250734.1250746"},{"key":"6_CR26","doi-asserted-by":"crossref","unstructured":"Nguyen, A.T., Bose, P., Ekanadham, K., Nanda, A., Michael, M.: Accuracy and speed-up of parallel trace-driven architectural simulation. In: Proceedings 11th International Parallel Processing Symposium, pp. 39\u201344. IEEE (1997)","DOI":"10.1109\/IPPS.1997.580842"},{"issue":"23","key":"6_CR27","doi-asserted-by":"crossref","first-page":"3311","DOI":"10.1016\/S0042-6989(97)00169-7","volume":"37","author":"BA Olshausen","year":"1997","unstructured":"Olshausen, B.A., Field, D.J.: Sparse coding with an overcomplete basis set: a strategy employed by v1? Vis. Res. 37(23), 3311\u20133325 (1997)","journal-title":"Vis. Res."},{"key":"6_CR28","doi-asserted-by":"crossref","unstructured":"Pakin, S., McCormick, p.: Hardware-independent application characterization. In: International Symposium on Workload Characterization (IISWC), Portland, Oregon, USA, pp. 111\u2013112. IEEE (2013)","DOI":"10.1109\/IISWC.2013.6704676"},{"key":"6_CR29","doi-asserted-by":"crossref","unstructured":"Rodrigues, A.F., Murphy, R.C., Kogge, P., Underwood, K.D.: The structural simulation toolkit: exploring novel architectures. In: Proceedings of the 2006 ACM\/IEEE Conference on Supercomputing, SC 2006, New York, NY, USA, p. 157. ACM (2006)","DOI":"10.1145\/1188455.1188618"},{"key":"6_CR30","doi-asserted-by":"crossref","unstructured":"Sahoo, S.K., Panuganti, R., Sadayappan, P., Krishnamoorthy, P.: Cache miss characterization and data locality optimization for imperfectly nested loops on shared memory multiprocessors. In: Proceeding of the 19th IEEE International Parallel and Distributed Processing Symposium, pp. 44\u201353 (2005)","DOI":"10.1109\/IPDPS.2005.134"},{"key":"6_CR31","doi-asserted-by":"crossref","unstructured":"Santhi, N., Eidenbenz, S., Liu, J.: The simian concept: parallel discrete event simulation with interpreted languages and just-in-time compilation. In: Proceedings of the 2015 Winter Simulation Conference (WSC), pp. 3013\u20133024. IEEE (2015)","DOI":"10.1109\/WSC.2015.7408405"},{"key":"6_CR32","doi-asserted-by":"crossref","unstructured":"Schuff, D.L., Kulkarni, M., Pai, V.S.: Accelerating multicore reuse distance analysis with sampling and parallelization. In: Proceedings of the 19th International Conference on Parallel Architectures and Compilation Techniques, PACT 2010, New York, NY, USA, pp. 53\u201364. ACM (2010)","DOI":"10.1145\/1854273.1854286"},{"key":"6_CR33","doi-asserted-by":"crossref","unstructured":"Sherwood, T., Perelman, E., Hamerly, G., Calder, B.: Automatically characterizing large scale program behavior. In: Proceedings of the 10th International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS X, New York, NY, USA, pp. 45\u201357. ACM (2002)","DOI":"10.1145\/605397.605403"},{"key":"6_CR34","doi-asserted-by":"crossref","unstructured":"Snavely, A., Carrington, L., Wolter, N., Labarta, J., Badia, R., Purkayastha, A.: A framework for performance modeling and prediction. In: Proceedings of the 2002 ACM\/IEEE Conference on Supercomputing, SC 2002, Los Alamitos, CA, USA, pp. 1\u201317. IEEE (2002)","DOI":"10.1109\/SC.2002.10004"},{"key":"6_CR35","doi-asserted-by":"crossref","unstructured":"Weinberg, J., McCracken, M.O., Strohmaier, E., Snavely, A.: Quantifying locality in the memory access patterns of hpc applications. In: Proceedings of the 2005 ACM\/IEEE Conference on Supercomputing, SC 2005, Washington, DC, USA, pp. 50\u201361. IEEE (2005)","DOI":"10.1109\/SC.2005.59"},{"issue":"6","key":"6_CR36","doi-asserted-by":"crossref","first-page":"20:1","DOI":"10.1145\/1552309.1552310","volume":"31","author":"Y Zhong","year":"2009","unstructured":"Zhong, Y., Shen, X., Ding, C.: Program locality analysis using reuse distance. ACM Trans. Program. Lang. Syst. 31(6), 20:1\u201320:39 (2009)","journal-title":"ACM Trans. Program. Lang. Syst."}],"container-title":["Lecture Notes in Computer Science","High Performance Computing Systems. Performance Modeling, Benchmarking, and Simulation"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-72971-8_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,8]],"date-time":"2019-10-08T13:29:48Z","timestamp":1570541388000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-72971-8_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,12,23]]},"ISBN":["9783319729701","9783319729718"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-72971-8_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017,12,23]]}}}