{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T04:54:05Z","timestamp":1755838445219},"publisher-location":"Berlin, Heidelberg","reference-count":24,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783662480953"},{"type":"electronic","value":"9783662480960"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-662-48096-0_35","type":"book-chapter","created":{"date-parts":[[2015,7,24]],"date-time":"2015-07-24T06:16:03Z","timestamp":1437718563000},"page":"451-463","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":22,"title":["How Many Threads will be too Many? On the Scalability of OpenMP Implementations"],"prefix":"10.1007","author":[{"given":"Christian","family":"Iwainsky","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sergei","family":"Shudler","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexandru","family":"Calotoiu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexandre","family":"Strube","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Michael","family":"Knobloch","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christian","family":"Bischof","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Felix","family":"Wolf","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,7,25]]},"reference":[{"key":"35_CR1","unstructured":"Stevens, R., et al.: Architectures and Technology for Extreme Scale Computing. Technical report, ASCR Scientific Grand Challenges Workshop Series, December 2009"},{"key":"35_CR2","doi-asserted-by":"crossref","unstructured":"Calotoiu, A., Hoefler, T., Poke, M., Wolf, F.: Using automated performance modeling to find scalability bugs in complex codes. In: Proceedings of the ACM\/IEEE Conference on Supercomputing (SC 2013), p. 45 (2013)","DOI":"10.1145\/2503210.2503277"},{"key":"35_CR3","unstructured":"Bull, J.M.: Measuring synchronisation and scheduling overheads in OpenMP. In: Proceedings of First European Workshop on OpenMP, pp. 99\u2013105 (1999)"},{"issue":"5","key":"35_CR4","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1145\/563647.563656","volume":"29","author":"JM Bull","year":"2001","unstructured":"Bull, J.M., O\u2019Neill, D.: A microbenchmark suite for OpenMP 2.0. ACM SIGARCH Comput. Archit. News 29(5), 41\u201348 (2001)","journal-title":"ACM SIGARCH Comput. Archit. News"},{"issue":"4","key":"35_CR5","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/1498765.1498785","volume":"52","author":"S Williams","year":"2009","unstructured":"Williams, S., Waterman, A., Patterson, D.: Roofline: an insightful visual performance model for multicore architectures. Commun. ACM 52(4), 65\u201376 (2009)","journal-title":"Commun. ACM"},{"issue":"387","key":"35_CR6","doi-asserted-by":"publisher","first-page":"575","DOI":"10.1080\/01621459.1984.10478083","volume":"79","author":"RR Picard","year":"1984","unstructured":"Picard, R.R., Cook, R.D.: Cross-validation of regression models. J. Am. Stat. Assoc. 79(387), 575\u2013583 (1984)","journal-title":"J. Am. Stat. Assoc."},{"key":"35_CR7","doi-asserted-by":"crossref","unstructured":"Hoefler, T., Schneider, T., Lumsdaine, A.: Accurately measuring collective operations at massive scale. In: Proceedings of the IEEE International Parallel & Distributed Processing Symposium, IPDPS 2008, pp. 1\u20138 (2008)","DOI":"10.1109\/IPDPS.2008.4536494"},{"issue":"10","key":"35_CR8","doi-asserted-by":"publisher","first-page":"1482","DOI":"10.1109\/26.103043","volume":"39","author":"DL Mills","year":"1991","unstructured":"Mills, D.L.: Internet time synchronization: the Network Time Protocol. IEEE Trans. Commun. 39(10), 1482\u20131493 (1991)","journal-title":"IEEE Trans. Commun."},{"key":"35_CR9","doi-asserted-by":"crossref","unstructured":"Weyers, B., Terboven, C., Schmidl, D., Herber, J., Kuhlen, T.W., M\u00fcller, M.S., Hentschel, B.: Visualization of memory access behavior on hierarchical NUMA architectures. In: Proceedings of the First Workshop on Visual Performance Analysis, VPA 2014, Piscataway, NJ, USA, pp. 42\u201349. IEEE Press (2014)","DOI":"10.1109\/VPA.2014.12"},{"key":"35_CR10","doi-asserted-by":"crossref","unstructured":"Mathis, M.M., Amato, N.M., Adams, M.L.: A general performance model for parallel sweeps on orthogonal grids for particle transport calculations. Technical report, College Station, TX, USA (2000)","DOI":"10.1145\/335231.335256"},{"key":"35_CR11","doi-asserted-by":"crossref","unstructured":"Pllana, S., Brandic, I., Benkner, S.: Performance modeling and prediction of parallel and distributed computing systems: a survey of the state of the art. In: Proceedings of the 1st International Conference on Complex, Intelligent and Software Intensive Systems (CISIS), pp. 279\u2013284 (2007)","DOI":"10.1109\/CISIS.2007.49"},{"key":"35_CR12","doi-asserted-by":"crossref","unstructured":"Petrini, F., Kerbyson, D.J., Pakin, S.: The case of the missing supercomputer performance: achieving optimal performance on the 8,192 processors of ASCI Q. In: Proceedings of the ACM\/IEEE Conference on Supercomputing (SC 2003), p. 55 (2003)","DOI":"10.1145\/1048935.1050204"},{"key":"35_CR13","doi-asserted-by":"crossref","unstructured":"Tallent, N.R., Hoisie, A.: Palm: easing the burden of analytical performance modeling. In: Proceedings of the International Conference on Supercomputing (ICS), pp. 221\u2013230 (2014)","DOI":"10.1145\/2597652.2597683"},{"key":"35_CR14","doi-asserted-by":"crossref","unstructured":"Spafford, K.L., Vetter, J.S.: Aspen: a domain specific language for performance modeling. In: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis. SC 2012, Los Alamitos, CA, USA, pp. 84:1\u201384:11. IEEE Computer Society Press (2012)","DOI":"10.1109\/SC.2012.20"},{"key":"35_CR15","doi-asserted-by":"crossref","unstructured":"Lee, B.C., Brooks, D.M., de Supinski, B.R., Schulz, M., Singh, K., McKee, S.A.: Methods of inference and learning for performance modeling of parallel applications. In: Proceedings of the 12th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (PPoPP 2007), pp. 249\u2013258 (2007)","DOI":"10.1145\/1229428.1229479"},{"issue":"5","key":"35_CR16","doi-asserted-by":"publisher","first-page":"305","DOI":"10.1145\/1837853.1693493","volume":"45","author":"J Zhai","year":"2010","unstructured":"Zhai, J., Chen, W., Zheng, W.: PHANTOM: predicting performance of parallel applications on large-scale parallel machines using a single node. SIGPLAN Not. 45(5), 305\u2013314 (2010)","journal-title":"SIGPLAN Not."},{"key":"35_CR17","doi-asserted-by":"crossref","unstructured":"Wu, X., Mueller, F.: ScalaExtrap: trace-based communication extrapolation for SPMD programs. In: Proceedings of the 16th ACM Symposium on Principles and Practice of Parallel Programming (PPoPP 2011), pp. 113\u2013122 (2011)","DOI":"10.1145\/2038037.1941569"},{"issue":"4","key":"35_CR18","doi-asserted-by":"publisher","first-page":"1340008","DOI":"10.1142\/S0129626413400082","volume":"23","author":"L Carrington","year":"2013","unstructured":"Carrington, L., Laurenzano, M., Tiwari, A.: Characterizing large-scale HPC applications through trace extrapolation. Parallel Process. Lett. 23(4), 1340008 (2013). doi:\n                      10.1142\/S0129626413400082","journal-title":"Parallel Process. Lett."},{"key":"35_CR19","doi-asserted-by":"crossref","unstructured":"Fredrickson, N.R., Afsahi, A., Qian, Y.: Performance characteristics of OpenMP constructs, and application benchmarks on a large symmetric multiprocessor. In: Proceedings of the 17th Annual International Conference on Supercomputing, pp. 140\u2013149. ACM (2003)","DOI":"10.1145\/782814.782835"},{"key":"35_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1007\/978-3-540-71351-7_4","volume-title":"High Performance Computing for Computational Science - VECPAR 2006","author":"K F\u00fcrlinger","year":"2007","unstructured":"F\u00fcrlinger, K., Gerndt, M.: Analyzing overheads and scalability characteristics of OpenMP applications. In: Dayd\u00e9, M., Palma, J.M.L.M., Coutinho, A.L.G.A., Pacitti, E., Lopes, J.C. (eds.) VECPAR 2006. LNCS, vol. 4395, pp. 39\u201351. Springer, Heidelberg (2007)"},{"key":"35_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"178","DOI":"10.1007\/978-3-540-68555-5_15","volume-title":"OpenMP Shared Memory Parallel Programming","author":"C Liao","year":"2008","unstructured":"Liao, C., Liu, Z., Huang, L., Chapman, B.: Evaluating OpenMP on chip multithreading platforms. In: Mueller, M.S., Chapman, B.M., de Supinski, B.R., Malony, A.D., Voss, M. (eds.) IWOMP 2005\/2006. LNCS, vol. 4315, pp. 178\u2013190. Springer, Heidelberg (2008)"},{"key":"35_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1007\/978-3-540-79561-2_2","volume-title":"OpenMP in a New Era of Parallelism","author":"G Bronevetsky","year":"2008","unstructured":"Bronevetsky, G., Gyllenhaal, J., de Supinski, B.R.: CLOMP: accurately characterizing OpenMP application overheads. In: Eigenmann, R., de Supinski, B.R. (eds.) IWOMP 2008. LNCS, vol. 5004, pp. 13\u201325. Springer, Heidelberg (2008)"},{"key":"35_CR23","unstructured":"Cramer, T., Schmidl, D., Klemm, M., an Mey, D.: OpenMP programming on Intel Xeon Phi coprocessors: an early performance comparison. In: Proceedings of the Many-core Applications Research Community (MARC) Symposium at RWTH Aachen University, pp. 38\u201344, November 2012"},{"issue":"1\/2","key":"35_CR24","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1147\/JRD.2012.2228769","volume":"57","author":"AE Eichenberger","year":"2013","unstructured":"Eichenberger, A.E., O\u2019Brien, K.: Experimenting with low-overhead OpenMP runtime on IBM Blue Gene\/Q. IBM J. Res. Dev. 57(1\/2), 8\u20131 (2013)","journal-title":"IBM J. Res. Dev."}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2015: Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-662-48096-0_35","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,20]],"date-time":"2020-04-20T00:34:46Z","timestamp":1587342886000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-662-48096-0_35"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783662480953","9783662480960"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-662-48096-0_35","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]},"assertion":[{"value":"25 July 2015","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}