{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T04:08:08Z","timestamp":1759032488206,"version":"3.37.3"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2016,8,19]],"date-time":"2016-08-19T00:00:00Z","timestamp":1471564800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100004837","name":"Ministerio de Ciencia e Innovaci\u00f3n","doi-asserted-by":"publisher","award":["TIN2014-53234-C2-1-R"],"award-info":[{"award-number":["TIN2014-53234-C2-1-R"]}],"id":[{"id":"10.13039\/501100004837","id-type":"DOI","asserted-by":"publisher"}]},{"name":"GenCat-DIUiE(GRR)","award":["2014-SGR-576"],"award-info":[{"award-number":["2014-SGR-576"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Cluster Comput"],"published-print":{"date-parts":[[2016,9]]},"DOI":"10.1007\/s10586-016-0615-4","type":"journal-article","created":{"date-parts":[[2016,8,19]],"date-time":"2016-08-19T09:15:34Z","timestamp":1471598134000},"page":"1105-1137","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Automated and dynamic abstraction of MPI application performance"],"prefix":"10.1007","volume":"19","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0090-4109","authenticated-orcid":false,"given":"Anna","family":"Sikora","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tom\u00e0s","family":"Margalef","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Josep","family":"Jorba","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,8,19]]},"reference":[{"key":"615_CR1","unstructured":"Unwind Library Project. http:\/\/www.nongnu.org\/libunwind\/ (2012). Accessed Apr 2016"},{"key":"615_CR2","unstructured":"Bailey, D.H., Harris, T., Saphir, W., der Wigngaart, R.V., Woo, A., Yarrow, M.: The NAS parallel benchmarks 2.0. Tech. Rep., NASA Ames Research Center, Report NAS-95-020 (1995)"},{"key":"615_CR3","unstructured":"Barcelona Supercomputing Center: MareNostrum system architecture. http:\/\/www.bsc.es\/marenostrum-support-services\/marenostrum-system-architecture (2014). Accessed Apr 2016"},{"key":"615_CR4","unstructured":"Barszcz, E., Fatoohi, R., Venkatakrishnan, V., Weeratunga, S.: Solution of regular, sparse triangular linear systems on vector and distributed-memory multiprocessors. Tech. Rep., NASA Ames Research Center, Technical Report NAS RNR-93-007 (1993)"},{"issue":"11","key":"615_CR5","doi-asserted-by":"crossref","first-page":"1533","DOI":"10.1002\/cpe.1125","volume":"19","author":"AR Bernat","year":"2007","unstructured":"Bernat, A.R., Miller, B.P.: Incremental call-path profiling. Concurrency 19(11), 1533\u20131547 (2007)","journal-title":"Concurrency"},{"key":"615_CR6","doi-asserted-by":"crossref","unstructured":"Brandes, U., Eiglsperger, M., Herman, I., Himsolt, M., Marshall, M.S., Leipert, S., Mutzel, P., Junger, M.: GraphML progress report, structural layer proposal. In: Graph Drawing\u20148th International Symposium, GD 2000, Colonial Williamsburg, VA, USA, pp. 501\u2013512 (2001)","DOI":"10.1007\/3-540-45848-4_59"},{"issue":"5","key":"615_CR7","doi-asserted-by":"crossref","first-page":"1823","DOI":"10.1137\/S1064827598339141","volume":"21","author":"PN Brown","year":"1999","unstructured":"Brown, P.N., Falgout, R.D., Jones, J.E.: Semicoarsening multigrid on distributed memory machines. SIAM J. Sci. Comput. 21(5), 1823\u20131834 (1999)","journal-title":"SIAM J. Sci. Comput."},{"key":"615_CR8","unstructured":"Preiss, B.R.: Bucket sort. http:\/\/www.brpreiss.com\/books\/opus5\/html\/page512.html (1998). Accessed Apr 2016"},{"issue":"4","key":"615_CR9","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1177\/109434200001400404","volume":"14","author":"B Buck","year":"2000","unstructured":"Buck, B., Hollingsworth, J.K.: An API for runtime code patching. Int. J. High Perform. Comput. Appl. 14(4), 317\u2013329 (2000)","journal-title":"Int. J. High Perform. Comput. Appl."},{"issue":"4","key":"615_CR10","doi-asserted-by":"crossref","first-page":"491","DOI":"10.1145\/115372.115324","volume":"13","author":"JD Choi","year":"1991","unstructured":"Choi, J.D., Miller, B.P., Netzer, R.H.B.: Techniques for debugging parallel programs with flowback analysis. ACM Trans. Program. Lang. Syst. 13(4), 491\u2013530 (1991)","journal-title":"ACM Trans. Program. Lang. Syst."},{"key":"615_CR11","doi-asserted-by":"crossref","unstructured":"Fox, G.C., Johnson, M.A., Lyzenga, G.A., Otto, S.W., Salmon, J.K., Walker, D.W.: Solving problems on concurrent processors. In: General Techniques and Regular Problems, vol. 1. Prentice-Hall, Inc., Upper Saddle River (1988)","DOI":"10.1063\/1.4822815"},{"issue":"6","key":"615_CR12","doi-asserted-by":"crossref","first-page":"702","DOI":"10.1002\/cpe.1556","volume":"22","author":"M Geimer","year":"2010","unstructured":"Geimer, M., Wolf, F., Wylie, B.J.N., \u00c1brah\u00e1m, E., Becker, D., Mohr, B.: The Scalasca performance toolset architecture. Concurr. Comput. 22(6), 702\u2013719 (2010)","journal-title":"Concurr. Comput."},{"issue":"6","key":"615_CR13","doi-asserted-by":"crossref","first-page":"736","DOI":"10.1002\/cpe.1551","volume":"22","author":"M Gerndt","year":"2010","unstructured":"Gerndt, M., Ott, M.: Automatic performance analysis with periscope. Concurr. Comput. 22(6), 736\u2013748 (2010)","journal-title":"Concurr. Comput."},{"key":"615_CR14","doi-asserted-by":"crossref","unstructured":"Gim\u00e9nez, J., Labarta, J., Pegenaute, F., Wen, H., Klepacki, D., Chung, I.H., Cong, G., Voigtl\u00e4nder, F., Mohr, B.: Guided performance analysis combining profile and trace tools. In: Euro-Par 2010 Parallel Processing Workshops, vol. 6586, pp. 513\u2013521 (2011)","DOI":"10.1007\/978-3-642-21878-1_63"},{"key":"615_CR15","doi-asserted-by":"crossref","unstructured":"Hafeez, M., Asghar, S., Malik, U.A., ur\u00a0Rehman, A., Riaz, N.: Survey of MPI implementations. In: DICTAP (2), Communications in Computer and Information Science, vol. 167, pp. 206\u2013220. Springer (2011)","DOI":"10.1007\/978-3-642-22027-2_18"},{"key":"615_CR16","doi-asserted-by":"crossref","unstructured":"Harris, T., Marlow, S., Peyton-Jones, S., Herlihy, M.: Composable memory transactions. In: Proceedings of the Tenth ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming. PPoPP \u201905, pp. 48\u201360. ACM, New York (2005)","DOI":"10.1145\/1065944.1065952"},{"key":"615_CR17","unstructured":"Vetter, J., Chambreau, C.: mpiP: lightweight, scalable MPI profiling. http:\/\/mpip.sourceforge.net\/ (2013). Accessed Apr 2016"},{"key":"615_CR18","unstructured":"Jorba, J., Margalef, T., Luque, E.: Performance analysis of parallel applications with KappaPI 2. In: PARCO, pp. 155\u2013162 (2005)"},{"key":"615_CR19","unstructured":"Jorba, J., Margalef, T., Luque, E., Andre, J.C.S., Viegas, D.X.: Application of parallel computing to the simulation of forest fire propagation. In: Proceedings of the 3rd International Conference in Forest Fire Propagation, vol.\u00a01, pp. 891\u2013900 (1998)"},{"key":"615_CR20","doi-asserted-by":"crossref","unstructured":"Kn\u00fcpfer, A., Brunst, H., Doleschal, J., Jurenz, M., Lieber, M., Mickler, H., M\u00fcller, M.S., Nagel, W.E.: The vampir performance analysis tool-set. In: Tools for High Performance Computing\u2014Proceedings of the 2nd International Workshop on Parallel Tools for High Performance Computing, July 2008, HLRS, Stuttgart, pp. 139\u2013155. Springer (2008)","DOI":"10.1007\/978-3-540-68564-7_9"},{"key":"615_CR21","doi-asserted-by":"crossref","unstructured":"Kranzlm\u00fcller, D., Schaubschl\u00e4ger, C., Volkert, J.: An integrated record & replay mechanism for nondeterministic message passing programs. In: PVM\/MPI, pp. 192\u2013200 (2001)","DOI":"10.1007\/3-540-45417-9_28"},{"issue":"7","key":"615_CR22","doi-asserted-by":"crossref","first-page":"558","DOI":"10.1145\/359545.359563","volume":"21","author":"L Lamport","year":"1978","unstructured":"Lamport, L.: Time, clocks, and the ordering of events in a distributed system. Commun. ACM 21(7), 558\u2013565 (1978)","journal-title":"Commun. ACM"},{"key":"615_CR23","unstructured":"Lawrence Livermore National Laboratory: SMG 2000 benchmark. http:\/\/asc.llnl.gov\/computing_resources\/purple\/archive\/benchmarks\/smg\/smg2000_readme.html (2001). Accessed Apr 2016"},{"key":"615_CR24","unstructured":"Lawrence Livermore National Laboratory: Scalable linear solvers, hypre library. http:\/\/computation.llnl.gov\/casc\/linear_solvers\/sls_hypre.html (2013). Accessed Apr 2016"},{"key":"615_CR25","doi-asserted-by":"crossref","unstructured":"Lee, B., Resnick, K., Bond, M.D., McKinley, K.S.: Correcting the dynamic call graph using control-flow constraints. In: International Conference on Compiler Construction, pp. 80\u201395 (2007)","DOI":"10.1007\/978-3-540-71229-9_6"},{"key":"615_CR26","doi-asserted-by":"crossref","first-page":"115","DOI":"10.1007\/978-3-642-37349-7_8","volume-title":"Tools for High Performance Computing 2012","author":"D Lorenz","year":"2013","unstructured":"Lorenz, D., B\u00f6hme, D., Mohr, B., Strube, A., Szebenyi, Z.: Extending scalascas analysis features. In: Cheptsov, A., Brinkmann, S., Gracia, J., Resch, M.M., Nagel, W.E. (eds.) Tools for High Performance Computing 2012, pp. 115\u2013126. Springer, Berlin (2013)"},{"key":"615_CR27","doi-asserted-by":"crossref","unstructured":"Malony, A.D., Shende, S., Spear, W., Lee, C.W., Biersdorff, S.: Advances in the TAU performance system. In: Tools for High Performance Computing 2011\u2014Proceedings of the 5th International Workshop on Parallel Tools for High Performance Computing, ZIH, Dresden, September 2011, pp. 119\u2013130. Springer, Berlin (2011)","DOI":"10.1007\/978-3-642-31476-6_10"},{"issue":"11","key":"615_CR28","doi-asserted-by":"crossref","first-page":"37","DOI":"10.1109\/2.471178","volume":"28","author":"BP Miller","year":"1995","unstructured":"Miller, B.P., Callaghan, M.D., Cargille, J.M., Hollingsworth, J.K., Irvin, R.B., Karavanic, K.L., Kunchithapadam, K., Newhall, T.: The paradyn parallel performance measurement tool. Computer 28(11), 37\u201346 (1995)","journal-title":"Computer"},{"key":"615_CR29","unstructured":"Mirgorodskiy, A.V.: Automated problem diagnosis in distributed systems. Ph.D. Thesis, Madison, WI, USA (2006). AAI3245668"},{"key":"615_CR30","doi-asserted-by":"crossref","unstructured":"Mirgorodskiy, A.V., Maruyama, N., Miller, B.P.: Scalable systems software\u2014problem diagnosis in large-scale computing environments. In: SC, p.\u00a088 (2006)","DOI":"10.1145\/1188455.1188548"},{"issue":"11","key":"615_CR31","doi-asserted-by":"crossref","first-page":"1517","DOI":"10.1002\/cpe.1126","volume":"19","author":"A Morajko","year":"2007","unstructured":"Morajko, A., Caymes-Scutari, P., Margalef, T., Luque, E.: MATE: monitoring, analysis and tuning environment for parallel\/distributed applications. Concurr. Comput. 19(11), 1517\u20131531 (2007)","journal-title":"Concurr. Comput."},{"key":"615_CR32","unstructured":"Morajko, O.: Online performance modeling and analysis of message-passing parallel applications. Ph.D. Thesis, Barcelona, Spain (2008)"},{"key":"615_CR33","doi-asserted-by":"crossref","unstructured":"Morajko, O., Morajko, A., Margalef, T., Luque, E.: On-line performance modeling for MPI applications. In: Euro-Par, pp. 68\u201377 (2008)","DOI":"10.1007\/978-3-540-85451-7_8"},{"key":"615_CR34","doi-asserted-by":"crossref","unstructured":"Noeth, M., Mueller, F., Schulz, M., de\u00a0Supinski, B.R.: Scalable compression and replay of communication traces in massively parallel environments. In: IPDPS, pp. 1\u201311 (2007)","DOI":"10.1109\/IPDPS.2007.370261"},{"issue":"8","key":"615_CR35","doi-asserted-by":"crossref","first-page":"696","DOI":"10.1016\/j.jpdc.2008.09.001","volume":"69","author":"M Noeth","year":"2009","unstructured":"Noeth, M., Ratn, P., Mueller, F., Schulz, M., de Supinski, B.R.: ScalaTrace: scalable compression and replay of communication traces for high-performance computing. J. Parallel Distrib. Comput. 69(8), 696\u2013710 (2009)","journal-title":"J. Parallel Distrib. Comput."},{"key":"615_CR36","unstructured":"Pillet, V., Labarta, J., Cortes, T., Girona, S.: PARAVER: a tool to visualize and analyze parallel code. Tech. Rep., In WoTUG-18, Departament d\u2019Arquitectura de Computadors, Universitat Polit\u00e8cnica de Catalunya (1995)"},{"key":"615_CR37","doi-asserted-by":"crossref","unstructured":"Ratn, P., Mueller, F., de Supinski, B.R., Schulz, M.: Preserving time in large-scale communication traces. In: Proceedings of the 22Nd Annual International Conference on Supercomputing. ICS \u201908, pp. 46\u201355. ACM, New York (2008)","DOI":"10.1145\/1375527.1375537"},{"key":"615_CR38","doi-asserted-by":"crossref","unstructured":"Reussner, R., Sanders, P., Prechelt, L., M++ller, M.: SKaMPI: a detailed, accurate MPI benchmark. In: In Vassuk Alexandrov and Jack Dongarra, editors, Recent Advances in Parallel Virtual Machine and Message Passing Interface, pp. 52\u201359. Springer, Berlin (1998)","DOI":"10.1007\/BFb0056559"},{"key":"615_CR39","doi-asserted-by":"crossref","unstructured":"Roth, P.C., Arnold, D.C., Miller, B.P.: MRNet: a software-based multicast\/reduction network for scalable tools. In: Proceedings of the 2003 ACM\/IEEE Conference on Supercomputing, SC \u201903, p. 21. ACM, New York (2003)","DOI":"10.1145\/1048935.1050172"},{"key":"615_CR40","doi-asserted-by":"crossref","unstructured":"Schulz, M.: Extracting critical path graphs from MPI applications. In: 2013 IEEE International Conference on Cluster Computing (CLUSTER), pp. 1\u201310 (2005)","DOI":"10.1109\/CLUSTR.2005.347035"},{"key":"615_CR41","doi-asserted-by":"crossref","unstructured":"Schulz, M., Bronevetsky, G., Supinski, B.R.: On the performance of transparent MPI piggyback messages. In: Proceedings of the 15th European PVM\/MPI Users\u2019 Group Meeting on Recent Advances in Parallel Virtual Machine and Message Passing Interface, pp. 194\u2013201. Springer, Berlin (2008)","DOI":"10.1007\/978-3-540-87475-1_28"},{"key":"615_CR42","unstructured":"Shende, S., Malony, A., Morris, A., Wolf, F.: Performance profiling overhead compensation for MPI programs. In: 12th European PVM\/MPI User\u2019s Group Meeting, LNCS, vol. 3666, pp. 359\u2013367 (2005). Record converted from VDB: 12.11.2012"},{"key":"615_CR43","doi-asserted-by":"publisher","unstructured":"Sikora, A., Margalef, T., Jorba, J.: Online root-cause performance analysis of parallel applications. In: Parallel Computing, pp. 81\u2013107 (2015). doi: 10.1016\/j.parco.2015.05.003","DOI":"10.1016\/j.parco.2015.05.003"},{"key":"615_CR44","doi-asserted-by":"crossref","unstructured":"Spear, W., Malony, A.D., Lee, C.W., Biersdorff, S., Shende, S.: An approach to creating performance visualizations in a parallel profile analysis tool. In: Proceedings of the 2011 International Conference on Parallel Processing. Euro-Par\u201911, vol. 2, pp. 156\u2013165. Springer, Berlin (2012)","DOI":"10.1007\/978-3-642-29740-3_19"},{"key":"615_CR45","doi-asserted-by":"crossref","unstructured":"Tallent, N.R., Hoisie, A.: Palm: easing the burden of analytical performance modeling. In: 2014 International Conference on Supercomputing, ICS\u201914, Muenchen, Germany, June 10\u201313, 2014, pp. 221\u2013230 (2014)","DOI":"10.1145\/2597652.2597683"},{"key":"615_CR46","unstructured":"University of Wisconsin: MRNet: a multicast\/reduction network. http:\/\/www.paradyn.org\/mrnet\/ (2012). Accessed Apr 2016"},{"key":"615_CR47","unstructured":"University of Wisconsin, University of Maryland: DyninstAPI programmer\u2019s guide, paradyn parallel performance tools. http:\/\/www.dyninst.org\/manuals\/dyninstAPI (2013). Accessed Apr 2016"},{"issue":"4","key":"615_CR48","doi-asserted-by":"crossref","first-page":"352","DOI":"10.1109\/TSE.1984.5010248","volume":"10","author":"M Weiser","year":"1984","unstructured":"Weiser, M.: Program slicing. IEEE Trans. Softw. Eng. 10(4), 352\u2013357 (1984)","journal-title":"IEEE Trans. Softw. Eng."},{"issue":"2\u20133","key":"615_CR49","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1007\/s00450-012-0221-5","volume":"28","author":"MC Wiedemann","year":"2013","unstructured":"Wiedemann, M.C., Kunkel, J.M., Zimmer, M., Ludwig, T., Resch, M.M., B\u00f6nisch, T., Wang, X., Chut, A., Aguilera, A., Nagel, W.E., Kluge, M., Mickler, H.: Towards I\/O analysis of HPC systems and a generic architecture to collect access patterns. Comput. Sci. Res. Dev. 28(2\u20133), 241\u2013251 (2013)","journal-title":"Comput. Sci. Res. Dev."},{"issue":"10\u201311","key":"615_CR50","doi-asserted-by":"crossref","first-page":"421","DOI":"10.1016\/S1383-7621(03)00102-4","volume":"49","author":"F Wolf","year":"2003","unstructured":"Wolf, F., Mohr, B.: Automatic performance analysis of hybrid MPI\/OpenMP applications. J. Syst. Archit. 49(10\u201311), 421\u2013439 (2003)","journal-title":"J. Syst. Archit."},{"key":"615_CR51","unstructured":"yWorks Company: yWorks, yEd\u2014java graph editor. http:\/\/www.yworks.com\/products\/yed\/ (2014). Accessed Apr 2016"}],"container-title":["Cluster Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-016-0615-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10586-016-0615-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-016-0615-4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,18]],"date-time":"2024-06-18T23:41:13Z","timestamp":1718754073000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10586-016-0615-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,8,19]]},"references-count":51,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2016,9]]}},"alternative-id":["615"],"URL":"https:\/\/doi.org\/10.1007\/s10586-016-0615-4","relation":{},"ISSN":["1386-7857","1573-7543"],"issn-type":[{"type":"print","value":"1386-7857"},{"type":"electronic","value":"1573-7543"}],"subject":[],"published":{"date-parts":[[2016,8,19]]}}}