{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T05:24:40Z","timestamp":1743139480693,"version":"3.40.3"},"publisher-location":"Cham","reference-count":18,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319676296"},{"type":"electronic","value":"9783319676302"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-67630-2_34","type":"book-chapter","created":{"date-parts":[[2017,10,19]],"date-time":"2017-10-19T04:33:17Z","timestamp":1508387597000},"page":"457-476","source":"Crossref","is-referenced-by-count":1,"title":["Analyzing Offloading Inefficiencies in Scalable Heterogeneous Applications"],"prefix":"10.1007","author":[{"given":"Robert","family":"Dietrich","sequence":"first","affiliation":[]},{"given":"Ronny","family":"Tsch\u00fcter","sequence":"additional","affiliation":[]},{"given":"Guido","family":"Juckeland","sequence":"additional","affiliation":[]},{"given":"Andreas","family":"Kn\u00fcpfer","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,10,20]]},"reference":[{"key":"34_CR1","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1016\/j.softx.2015.06.001","volume":"1\u20132","author":"MJ Abraham","year":"2015","unstructured":"Abraham, M.J., Murtola, T., Schulz, R., Pll, S., Smith, J.C., Hess, B., Lindahl, E.: Gromacs: high performance molecular simulations through multi-level parallelism from laptops to supercomputers. SoftwareX 1\u20132, 19\u201325 (2015). doi: 10.1016\/j.softx.2015.06.001","journal-title":"SoftwareX"},{"issue":"6","key":"34_CR2","doi-asserted-by":"crossref","first-page":"685","DOI":"10.1002\/cpe.1553","volume":"22","author":"L Adhianto","year":"2010","unstructured":"Adhianto, L., Banerjee, S., Fagan, M., Krentel, M., Marin, G., Mellor-Crummey, J., Tallent, N.R.: HPCTOOLKIT: tools for performance analysis of optimized parallel programs. Concurrency Comput. Pract. Exp. 22(6), 685\u2013701 (2010)","journal-title":"Concurrency Comput. Pract. Exp."},{"key":"34_CR3","unstructured":"All members of the OpenMP Language Working Group: OpenMP Technical report 4: Version 5.0 Preview 1. OpenMP Architecture Review Board (2016)"},{"key":"34_CR4","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1007\/978-3-642-24025-6_8","volume-title":"Competence in High Performance Computing 2010","author":"D An Mey","year":"2012","unstructured":"An Mey, D., et al.: Score-P: a unified performance measurement system for petascale applications. In: Bischof, C., Hegering, H.G., Nagel, W.E., Wittum, G. (eds.) Competence in High Performance Computing 2010, pp. 85\u201397. Springer, Heidelberg (2012). doi: 10.1007\/978-3-642-24025-6_8"},{"key":"34_CR5","doi-asserted-by":"crossref","unstructured":"B\u00f6hme, D., Geimer, M., Wolf, F., Arnold, L.: Identifying the root causes of wait states in large-scale parallel applications. In: 39th International Conference on Parallel Processing, ICPP, pp. 90\u2013100. IEEE (2010)","DOI":"10.1109\/ICPP.2010.18"},{"key":"34_CR6","doi-asserted-by":"crossref","unstructured":"B\u00f6hme, D., Wolf, F., de Supinski, B.R., Schulz, M., Geimer, M.: Scalable critical-path based performance analysis. In: 26th International Parallel Distributed Processing Symposium, IPDPS, pp. 1330\u20131340. IEEE (2012)","DOI":"10.1109\/IPDPS.2012.120"},{"key":"34_CR7","doi-asserted-by":"crossref","unstructured":"Chabbi, M., Murthy, K., Fagan, M., Mellor-Crummey, J.: Effective sampling-driven performance tools for GPU-accelerated supercomputers. In: International Conference on High Performance Computing, Networking, Storage and Analysis, SC 2013, pp. 43:1\u201343:12. ACM (2013)","DOI":"10.1145\/2503210.2503299"},{"key":"34_CR8","doi-asserted-by":"crossref","unstructured":"Dietrich, R., Juckeland, G., Wolfe, M.: OpenACC programs examined: a performance analysis approach. In: 44th International Conference on Parallel Processing, ICPP. IEEE (2015)","DOI":"10.1109\/ICPP.2015.40"},{"key":"34_CR9","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1016\/j.jss.2015.12.050","volume":"125","author":"R Dietrich","year":"2016","unstructured":"Dietrich, R., Schmitt, F., Grund, A., Stolle, J.: Critical-blame analysis for OpenMP 4.0 offloading on Intel Xeon Phi. J. Syst. Softw. 125, 381\u2013388 (2016). doi: 10.1016\/j.jss.2015.12.050","journal-title":"J. Syst. Softw."},{"key":"34_CR10","doi-asserted-by":"crossref","unstructured":"Dietrich, R., Tsch\u00fcter, R.: A generic infrastructure for opencl performance analysis. In: 8th International Conference on Intelligent Data Acquisition and Advanced Computing Systems, Technology and Applications. IEEE (2015)","DOI":"10.1109\/IDAACS.2015.7340754"},{"key":"34_CR11","doi-asserted-by":"crossref","unstructured":"Eschweiler, D., Becker, D., Wolf, F.: Patterns of inefficient performance behavior in GPU applications. In: 19th International Euromicro Conference on Parallel, Distributed and Network-Based Processing, PDP 2011, pp. 262\u2013266. IEEE Computer Society (2011)","DOI":"10.1109\/PDP.2011.84"},{"issue":"6","key":"34_CR12","doi-asserted-by":"crossref","first-page":"702","DOI":"10.1002\/cpe.1556","volume":"22","author":"M Geimer","year":"2010","unstructured":"Geimer, M., Wolf, F., Wylie, B.J.N., Erika Abraham, D.B., Mohr, B.: The scalasca performance toolset architecture. Concurrency Comput. Pract. Exp. 22(6), 702\u2013719 (2010)","journal-title":"Concurrency Comput. Pract. Exp."},{"key":"34_CR13","doi-asserted-by":"crossref","unstructured":"Herdman, J.A., et al.: Accelerating hydrocodes with OpenACC, OpenCL and CUDA. In: SC Companion: High Performance Computing, Networking Storage and Analysis, pp. 465\u2013471 (2012)","DOI":"10.1109\/SC.Companion.2012.66"},{"key":"34_CR14","doi-asserted-by":"crossref","unstructured":"Karlin, I., Keasler, J., Neely, R.: Lulesh 2.0 updates and changes. Technical report LLNL-TR-641973 (2013)","DOI":"10.2172\/1090032"},{"key":"34_CR15","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1007\/978-3-540-68564-7_9","volume-title":"Tools for High Performance Computing","author":"A Kn\u00fcpfer","year":"2008","unstructured":"Kn\u00fcpfer, A., et al.: The Vampir performance analysis tool-set. In: Resch, M., Keller, R., Himmler, V., Krammer, B., Schulz, A. (eds.) Tools for High Performance Computing, pp. 139\u2013155. Springer, Heidelberg (2008). doi: 10.1007\/978-3-540-68564-7_9"},{"key":"34_CR16","doi-asserted-by":"crossref","unstructured":"Schmitt, F., Dietrich, R., Juckeland, G.: Scalable critical-path analysis and optimization guidance for hybrid MPI-CUDA applications. Int. J. High Perform. Comput. Appl. (2016)","DOI":"10.1177\/1094342016661865"},{"key":"34_CR17","doi-asserted-by":"crossref","unstructured":"Schmitt, F., Stolle, J., Dietrich, R.: CASITA: a tool for identifying critical optimization targets in distributed heterogeneous applications. In: 43rd International Conference on Parallel Processing Workshops, ICPPW. IEEE (2014)","DOI":"10.1109\/ICPPW.2014.35"},{"issue":"11","key":"34_CR18","doi-asserted-by":"crossref","first-page":"1481","DOI":"10.1002\/cpe.1128","volume":"19","author":"F Wolf","year":"2007","unstructured":"Wolf, F., Mohr, B., Dongarra, J., Moore, S.: Automatic analysis of inefficiency patterns in parallel applications. Concurrency Comput. Pract. Exp. 19(11), 1481\u20131496 (2007)","journal-title":"Concurrency Comput. Pract. Exp."}],"container-title":["Lecture Notes in Computer Science","High Performance Computing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-67630-2_34","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,27]],"date-time":"2024-06-27T21:36:54Z","timestamp":1719524214000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-67630-2_34"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319676296","9783319676302"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-67630-2_34","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]}}}