{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T07:26:46Z","timestamp":1743751606889,"version":"3.37.3"},"publisher-location":"Cham","reference-count":47,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319405261"},{"type":"electronic","value":"9783319405285"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-40528-5_20","type":"book-chapter","created":{"date-parts":[[2016,9,14]],"date-time":"2016-09-14T06:50:15Z","timestamp":1473835815000},"page":"445-465","source":"Crossref","is-referenced-by-count":6,"title":["Automatic Performance Modeling of HPC Applications"],"prefix":"10.1007","author":[{"given":"Felix","family":"Wolf","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christian","family":"Bischof","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexandru","family":"Calotoiu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Torsten","family":"Hoefler","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christian","family":"Iwainsky","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Grzegorz","family":"Kwasniewski","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bernd","family":"Mohr","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sergei","family":"Shudler","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexandre","family":"Strube","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andreas","family":"Vogel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gabriel","family":"Wittum","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,9,15]]},"reference":[{"issue":"6","key":"20_CR1","doi-asserted-by":"crossref","first-page":"685","DOI":"10.1002\/cpe.1553","volume":"22","author":"L. Adhianto","year":"2010","unstructured":"Adhianto, L., Banerjee, S., Fagan, M.W., Krentel, M.W., Marin, G., Mellor-Crummey, J., Tallent, N.R.: HPCToolkit: tools for performance analysis of optimized parallel programs. Concurr. Comput. Pract. Exper. 22 (6), 685\u2013701 (2010)","journal-title":"Concurr. Comput. Pract. Exper."},{"doi-asserted-by":"crossref","unstructured":"Bailey, D.H., Barszcz, E., Barton, J.T., Browning, D.S., Carter, R.L., Dagum, L., Fatoohi, R.A., Frederickson, P.O., Lasinski, T.A., Schreiber, R.S., Simon, H.D., Venkatakrishnan, V., Weeratunga, S.K.: The NAS parallel benchmarks\u2013summary and preliminary results. In: Proceedings of the 1991 ACM\/IEEE Conference on Supercomputing (SC), Albuquerque, pp. 158\u2013165. ACM (1991)","key":"20_CR2","DOI":"10.1145\/125826.125925"},{"doi-asserted-by":"crossref","unstructured":"Bauer, G., Gottlieb, S., Hoefler, T.: Performance modeling and comparative analysis of the MILC lattice QCD application su3_rmd. In: Proceedings of the CCGrid, Ottawa, pp. 652\u2013659. IEEE (2012)","key":"20_CR3","DOI":"10.1109\/CCGrid.2012.123"},{"key":"20_CR4","first-page":"19","volume":"38","author":"M. Behr","year":"2008","unstructured":"Behr, M., Nicolai, M., Probst, M.: Efficient parallel simulations in support of medical device design. NIC Ser. 38, 19\u201326 (2008)","journal-title":"NIC Ser."},{"doi-asserted-by":"crossref","unstructured":"Benabderrahmane, M.W., Pouchet, L.N., Cohen, A., Bastoul, C.: The polyhedral model is more widely applicable than you think. In: Gupta, R. (ed.) Compiler Construction. LNCS, vol. 6011, pp. 283\u2013303. Springer (2010). http:\/\/dx.doi.org\/10.1007\/978-3-642-11970-5_16","key":"20_CR5","DOI":"10.1007\/978-3-642-11970-5_16"},{"doi-asserted-by":"crossref","unstructured":"Bhattacharyya, A., Kwasniewski, G., Hoefler, T.: Using compiler techniques to improve automatic performance modeling. In: Accepted at the 24th International Conference on Parallel Architectures and Compilation (PACT\u201915), San Francisco. ACM (2015)","key":"20_CR6","DOI":"10.1109\/PACT.2015.39"},{"doi-asserted-by":"crossref","unstructured":"Bhattacharyya, A., Hoefler, T.: PEMOGEN: automatic adaptive performance modeling during program runtime. In: Proceedings of the 23rd International Conference on Parallel Architectures and Compilation Techniques (PACT\u201914). ACM, Edmonton (2014)","key":"20_CR7","DOI":"10.1145\/2628071.2628100"},{"key":"20_CR8","first-page":"103","volume":"6355","author":"R. Blanc","year":"2010","unstructured":"Blanc, R., Henzinger, T.A., Hottelier, T., Kovacs, L.: ABC: algebraic bound computation for loops. In: Clarke, E., Voronkov, A. (eds.) Logic for Programming, Artificial Intelligence, and Reasoning. LNCS, vol. 6355, pp. 103\u2013118 (2010). http:\/\/dx.doi.org\/10.1007\/978-3-642-17511-4_7","journal-title":"LNCS"},{"doi-asserted-by":"crossref","unstructured":"Bull, J.M., O\u2019Neill, D.: A microbenchmark suite for OpenMP 2.0. ACM Comput. Architech. News 29 (5), 41\u201348 (2001)","key":"20_CR9","DOI":"10.1145\/563647.563656"},{"doi-asserted-by":"crossref","unstructured":"Calotoiu, A., Hoefler, T., Poke, M., Wolf, F.: Using automated performance modeling to find scalability bugs in complex codes. In: Proceedings of the ACM\/IEEE Conference on Supercomputing (SC13), Denver, pp. 1\u201312. ACM (2013)","key":"20_CR10","DOI":"10.1145\/2503210.2503277"},{"issue":"3","key":"20_CR11","doi-asserted-by":"crossref","first-page":"336","DOI":"10.1016\/j.future.2004.11.019","volume":"22","author":"L. Carrington","year":"2006","unstructured":"Carrington, L., Snavely, A., Wolter, N.: A performance prediction framework for scientific applications. Future Gener. Comput. Syst. 22 (3), 336\u2013346 (2006). http:\/\/dx.doi.org\/10.1016\/j.future.2004.11.019","journal-title":"Future Gener. Comput. Syst."},{"issue":"13","key":"20_CR12","doi-asserted-by":"crossref","first-page":"1749","DOI":"10.1002\/cpe.1206","volume":"19","author":"E. Chan","year":"2007","unstructured":"Chan, E., Heimlich, M., Purkayastha, A., van de Geijn, R.: Collective communication: theory, practice, and experience. Concurr. Comput. Pract. Exp. 19 (13), 1749\u20131783 (2007)","journal-title":"Concurr. Comput. Pract. Exp."},{"issue":"1","key":"20_CR13","doi-asserted-by":"crossref","first-page":"74","DOI":"10.1177\/1094342011428142","volume":"26","author":"J.M. Dennis","year":"2012","unstructured":"Dennis, J.M., Edwards, J., Evans, K.J., Guba, O., Lauritzen, P.H., Mirin, A.A., St-Cyr, A., Taylor, M.A., Worley, P.H.: CAM-SE: a scalable spectral element dynamical core for the community atmosphere model. Int. J. High Perform. Comput. 26 (1), 74\u201389 (2012). http:\/\/hpc.sagepub.com\/content\/26\/1\/74.abstract","journal-title":"Int. J. High Perform. Comput."},{"issue":"6","key":"20_CR14","doi-asserted-by":"crossref","first-page":"702","DOI":"10.1002\/cpe.1556","volume":"22","author":"M. Geimer","year":"2010","unstructured":"Geimer, M., Wolf, F., Wylie, B.J.N., \u00c1brah\u00e1m, E., Becker, D., Mohr, B.: The Scalasca performance toolset architecture. Concurr. Comput. Pract. Exp. 22 (6), 702\u2013719 (2010)","journal-title":"Concurr. Comput. Pract. Exp."},{"issue":"4","key":"20_CR15","doi-asserted-by":"crossref","first-page":"1430","DOI":"10.4249\/scholarpedia.1430","volume":"2","author":"M.O. Gewaltig","year":"2007","unstructured":"Gewaltig, M.O., Diesmann, M.: Nest (neural simulation tool). Scholarpedia J. 2 (4), 1430 (2007)","journal-title":"Scholarpedia J."},{"unstructured":"Goldsmith, S.F., Aiken, A.S., Wilkerson, D.S.: Measuring empirical computational complexity. In: Proceedings of the 6th Joint Meeting of the European Software Engineering Conference and the ACM SIGSOFT Symposium on the Foundations of Software Engineering (ESEC-FSE \u201907), New York, pp. 395\u2013404. ACM (2007). http:\/\/doi.acm.org\/10.1145\/1287624.1287681","key":"20_CR16"},{"unstructured":"Hammer, J., Hager, G., Eitzinger, J., Wellein, G.: Automatic loop kernel analysis and performance modeling with kerncraft. In: Proceedings of the 6th International Workshop on Performance Modeling, Benchmarking, and Simulation of High Performance Computing Systems (PMBS \u201915), New York, pp. 4:1\u20134:11. ACM (2015). http:\/\/doi.acm.org\/10.1145\/2832087.2832092","key":"20_CR17"},{"doi-asserted-by":"crossref","unstructured":"Hoefler, T., Kwasniewski, G.: Automatic complexity analysis of explicitly parallel programs. In: Proceedings of the 26th ACM Symposium on Parallelism in Algorithms and Architectures (SPAA\u201914), Prague. ACM (2014)","key":"20_CR18","DOI":"10.1145\/2612669.2612685"},{"doi-asserted-by":"crossref","unstructured":"Hoefler, T., Snir, M.: Performance engineering: a must for petaflops and beyond. In: Proceedings of the Workshop on Large-Scale System and Application Performance (LSAP), in Conjunction with HPDC, San Jose. ACM (2011)","key":"20_CR19","DOI":"10.1145\/1996029.1996031"},{"unstructured":"Hoefler, T., Gropp, W., Kramer, W., Snir, M.: Performance modeling for systematic performance tuning. In: State of the Practice Reports (SC \u201911), pp. 6:1\u20136:12. ACM (2011). http:\/\/doi.acm.org\/10.1145\/2063348.2063356","key":"20_CR20"},{"unstructured":"Hoefler, T., Kwasniewski, G.: Automatic complexity analysis of explicitly parallel programs. In: Proceedings of the 26th ACM Symposium on Parallelism in Algorithms and Architectures (SPAA \u201914), New York, pp. 226\u2013235. ACM (2014). http:\/\/doi.acm.org\/10.1145\/2612669.2612685","key":"20_CR21"},{"doi-asserted-by":"crossref","unstructured":"Iwainsky, C., Shudler, S., Calotoiu, A., Strube, A., Knobloch, M., Bischof, C., Wolf, F.: How many threads will be too many? On the scalability of OpenMP implementations. In: Proceedings of the 21st Euro-Par Conference, Vienna. LNCS, vol. 9233, pp. 451\u2013463. Springer (2015)","key":"20_CR22","DOI":"10.1007\/978-3-662-48096-0_35"},{"doi-asserted-by":"crossref","unstructured":"Jayakumar, A., Murali, P., Vadhiyar, S.: Matching application signatures for performance predictions using a single execution. In: 2015 IEEE International Parallel and Distributed Processing Symposium (IPDPS), Hyderabad, pp. 1161\u20131170. IEEE (2015)","key":"20_CR23","DOI":"10.1109\/IPDPS.2015.20"},{"unstructured":"JuBE \u2013 J\u00fclich Benchmarking Environment (2016). http:\/\/www.fz-juelich.de\/jsc\/jube","key":"20_CR24"},{"unstructured":"JuSPIC \u2013 J\u00fclich Scalable Particle-in-Cell Code (2016). http:\/\/www.fz-juelich.de\/ias\/jsc\/EN\/Expertise\/High-Q-Club\/JuSPIC\/_node.html","key":"20_CR25"},{"doi-asserted-by":"crossref","unstructured":"Kerbyson, D.J., Alme, H.J., Hoisie, A., Petrini, F., Wasserman, H.J., Gittings, M.: Predictive performance and scalability modeling of a large-scale application. In: Proceedings of the ACM\/IEEE Conference on Supercomputing (SC\u201901), Denver, p. 37. ACM (2001)","key":"20_CR26","DOI":"10.1145\/582034.582071"},{"unstructured":"LLVM home page (2016). http:\/\/llvm.org\/","key":"20_CR27"},{"doi-asserted-by":"crossref","unstructured":"Lo, Y.J., Williams, S., Van Straalen, B., Ligocki, T.J., Cordery, M.J., Wright, N.J., Hall, M.W., Oliker, L.: Roofline model toolkit: a practical tool for architectural and program analysis. In: High Performance Computing Systems. Performance Modeling, Benchmarking, and Simulation, New Orleans, pp. 129\u2013148. Springer (2014)","key":"20_CR28","DOI":"10.1007\/978-3-319-17248-4_7"},{"issue":"1","key":"20_CR29","doi-asserted-by":"crossref","first-page":"2","DOI":"10.1145\/1012888.1005691","volume":"32","author":"G. Marin","year":"2004","unstructured":"Marin, G., Mellor-Crummey, J.: Cross-architecture performance predictions for scientific applications using parameterized models. SIGMETRICS Perform. Eval. Rev. 32 (1), 2\u201313 (2004). http:\/\/doi.acm.org\/10.1145\/1012888.1005691","journal-title":"SIGMETRICS Perform. Eval. Rev."},{"unstructured":"MILC Code Version 7 (2016). http:\/\/www.physics.utah.edu\/~detar\/milc\/milc_qcd.html","key":"20_CR30"},{"doi-asserted-by":"crossref","unstructured":"Pllana, S., Brandic, I., Benkner, S.: Performance modeling and prediction of parallel and distributed computing systems: a survey of the state of the art. In: Proceedings of the 1st International Conference on Complex, Intelligent and Software Intensive Systems (CISIS), Vienna, pp. 279\u2013284. IEEE (2007)","key":"20_CR31","DOI":"10.1109\/CISIS.2007.49"},{"unstructured":"Shudler, S., Calotoiu, A., Hoefler, T., Strube, A., Wolf, F.: Exascaling your library: will your implementation meet your expectations? In: Proceedings of the 29th ACM on International Conference on Supercomputing (ICS \u201915), New York, pp. 165\u2013175. ACM (2015). http:\/\/doi.acm.org\/10.1145\/2751205.2751216","key":"20_CR32"},{"unstructured":"Siegmund, N., Grebhahn, A., Apel, S., K\u00e4stner, C.: Performance-influence models for highly configurable systems. In: Proceedings of the 2015-10th Joint Meeting on Foundations of Software Engineering (ESEC\/FSE 2015), New York, pp. 284\u2013294. ACM (2015). http:\/\/doi.acm.org\/10.1145\/2786805.2786845","key":"20_CR33"},{"unstructured":"Spafford, K.L., Vetter, J.S.: Aspen: a domain specific language for performance modeling. In: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis (SC \u201912), Los Alamitos, pp. 84:1\u201384:11. IEEE Computer Society Press (2012). http:\/\/dl.acm.org\/citation.cfm?id=2388996.2389110","key":"20_CR34"},{"key":"20_CR35","volume-title":"Particle based simulations of complex systems with mp2c: hydrodynamics and electrostatics","author":"G. Sutmann","year":"2010","unstructured":"Sutmann, G., Westphal, L., Bolten, M.: Particle based simulations of complex systems with mp2c: hydrodynamics and electrostatics. In: International Conference of Numerical Analysis and Applied Mathematics 2010 (ICNAAM 2010), Rhodes, vol. 1281, pp. 1768\u20131772. AIP Publishing (2010)"},{"unstructured":"Tallent, N.R., Hoisie, A.: Palm: easing the burden of analytical performance modeling. In: Proceedings of the 28th ACM International Conference on Supercomputing (ICS \u201914), NewYork, pp. 221\u2013230. ACM (2014). http:\/\/doi.acm.org\/10.1145\/2597652.2597683","key":"20_CR36"},{"issue":"1","key":"20_CR37","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1177\/1094342005051521","volume":"19","author":"R. Thakur","year":"2005","unstructured":"Thakur, R., Rabenseifner, R., Gropp, W.: Optimization of collective communication operations in mpich. Int. J. High Perform. Comput. 19 (1), 49\u201366 (2005)","journal-title":"Int. J. High Perform. Comput."},{"doi-asserted-by":"crossref","unstructured":"Vetter, J., Worley, P.: Asserting performance expectations. In: Proceedings of the ACM\/IEEE Conference on Supercomputing, Baltimore, pp. 1\u201313. ACM (2002)","key":"20_CR38","DOI":"10.1109\/SC.2002.10046"},{"issue":"4","key":"20_CR39","doi-asserted-by":"crossref","first-page":"165","DOI":"10.1007\/s00791-014-0232-9","volume":"16","author":"A. Vogel","year":"2013","unstructured":"Vogel, A., Reiter, S., Rupp, M., N\u00e4gel, A., Wittum, G.: UG 4: a novel flexible software system for simulating PDE based models on high performance computers. Comput. Vis. Sci. 16 (4), 165\u2013179 (2013)","journal-title":"Comput. Vis. Sci."},{"doi-asserted-by":"crossref","unstructured":"Vogel, A., Calotoiu, A., Strube, A., Reiter, S., N\u00e4gel, A., Wolf, F., Wittum, G.: 10,000 performance models per minute \u2013 scalability of the ug4 simulation framework. In: Proceedings of the 21st Euro-Par Conference, Vienna. LNCS, vol. 9233, pp. 519\u2013531. Springer (2015)","key":"20_CR40","DOI":"10.1007\/978-3-662-48096-0_40"},{"doi-asserted-by":"crossref","unstructured":"V\u00f6mel, C.: ScaLAPACK\u2019s MRRR algorithm. ACM T. Math. Softw. 37 (1), 1:1\u20131:35 (2010)","key":"20_CR41","DOI":"10.1145\/1644001.1644002"},{"issue":"1","key":"20_CR42","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1177\/1094342004041293","volume":"18","author":"R. Vuduc","year":"2004","unstructured":"Vuduc, R., Demmel, J.W., Bilmes, J.A.: Statistical models for empirical search-based performance tuning. Int. J. High Perform. Comput. 18 (1), 65\u201394 (2004). http:\/\/dx.doi.org\/10.1177\/1094342004041293","journal-title":"Int. J. High Perform. Comput."},{"key":"20_CR43","doi-asserted-by":"crossref","first-page":"330","DOI":"10.1177\/109434200001400405","volume":"14","author":"H. Wasserman","year":"2000","unstructured":"Wasserman, H., Hoisie, A., Lubeck, O., Lubeck, O.: Performance and scalability analysis of teraflop-scale parallel architectures using multidimensional wavefront applications. Int. J. High Perform. Comput. 14, 330\u2013346 (2000)","journal-title":"Int. J. High Perform. Comput."},{"issue":"1","key":"20_CR44","first-page":"113","volume":"34","author":"X. Wu","year":"2012","unstructured":"Wu, X., M\u00fcller, F.: Scalaextrap: trace-based communication extrapolation for SPMD programs. ACM T. Lang. Sys. 34 (1), 113\u2013122 (2012)","journal-title":"ACM T. Lang. Sys."},{"issue":"4","key":"20_CR45","doi-asserted-by":"crossref","first-page":"397","DOI":"10.1142\/S0129626410000314","volume":"20","author":"B.J.N. Wylie","year":"2010","unstructured":"Wylie, B.J.N., Geimer, M., Mohr, B., B\u00f6hme, D., Szebenyi, Z., Wolf, F.: Large-scale performance analysis of Sweep3D with the Scalasca toolset. Parallel Process. Lett. 20 (4), 397\u2013414 (2010)","journal-title":"Parallel Process. Lett."},{"issue":"6","key":"20_CR46","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1145\/2345156.2254074","volume":"47","author":"D. Zaparanuks","year":"2012","unstructured":"Zaparanuks, D., Hauswirth, M.: Algorithmic profiling. Sigplan Not. 47 (6), 67\u201376 (2012). http:\/\/doi.acm.org\/10.1145\/2345156.2254074","journal-title":"Sigplan Not."},{"issue":"5","key":"20_CR47","doi-asserted-by":"crossref","first-page":"305","DOI":"10.1145\/1837853.1693493","volume":"45","author":"J. Zhai","year":"2010","unstructured":"Zhai, J., Chen, W., Zheng, W.: Phantom: predicting performance of parallel applications on large-scale parallel machines using a single node. Sigplan Not. 45 (5), 305\u2013314 (2010). http:\/\/doi.acm.org\/10.1145\/1837853.1693493","journal-title":"Sigplan Not."}],"container-title":["Lecture Notes in Computational Science and Engineering","Software for Exascale Computing - SPPEXA 2013-2015"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-40528-5_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,19]],"date-time":"2024-06-19T10:23:32Z","timestamp":1718792612000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-40528-5_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319405261","9783319405285"],"references-count":47,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-40528-5_20","relation":{},"ISSN":["1439-7358","2197-7100"],"issn-type":[{"type":"print","value":"1439-7358"},{"type":"electronic","value":"2197-7100"}],"subject":[],"published":{"date-parts":[[2016]]}}}