{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T17:53:24Z","timestamp":1773251604857,"version":"3.50.1"},"reference-count":27,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2016,12,27]],"date-time":"2016-12-27T00:00:00Z","timestamp":1482796800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Cluster Comput"],"published-print":{"date-parts":[[2017,6]]},"DOI":"10.1007\/s10586-016-0707-1","type":"journal-article","created":{"date-parts":[[2016,12,26]],"date-time":"2016-12-26T20:11:57Z","timestamp":1482783117000},"page":"1179-1192","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["Predicting HPC parallel program performance based on LLVM compiler"],"prefix":"10.1007","volume":"20","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4783-876X","authenticated-orcid":false,"given":"Weizhe","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Meng","family":"Hao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marc","family":"Snir","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,12,27]]},"reference":[{"key":"707_CR1","unstructured":"Top 500 supercomputer site. http:\/\/www.top500.org"},{"key":"707_CR2","doi-asserted-by":"crossref","unstructured":"Kerbyson, D.J., Alme, H.J., Hoisie, A., Petrini, F., Wasserman, H.J., Gittings, M.: Predictive performance and scalability modeling of a large-scale application. In: Proceedings of the 2001 ACM\/IEEE Conference on Supercomputing. ACM (2001)","DOI":"10.1145\/582034.582071"},{"key":"707_CR3","doi-asserted-by":"crossref","unstructured":"Sharapov, I., Kroeger, R., Delamarter, G., Cheveresan, R., Ramsay, M.: A case study in top-down performance estimation for a large-scale parallel application. In: Proceedings of the Eleventh ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 81\u201389. ACM (2006)","DOI":"10.1145\/1122971.1122985"},{"key":"707_CR4","doi-asserted-by":"crossref","unstructured":"Snavely, A., Carrington, L., Wolter, N., Labarta, J., Badia, R., Purkayastha, A.: A framework for performance modeling and prediction. In: Supercomputing, ACM\/IEEE 2002 Conference. IEEE (2002)","DOI":"10.1109\/SC.2002.10004"},{"key":"707_CR5","unstructured":"Zheng, G., Kakulapati, G., Kal\u00e9, L.V.: Bigsim: a parallel simulator for performance prediction of extremely large parallel machines. In: Proceedings of the 18th International Parallel and Distributed Processing Symposium. IEEE (2004)"},{"issue":"2","key":"707_CR6","doi-asserted-by":"crossref","first-page":"495","DOI":"10.1109\/TC.2015.2417526","volume":"65","author":"W Zhang","year":"2016","unstructured":"Zhang, W., Cheng, A.M., Subhlok, J.: DwarfCode: a performance prediction tool for parallel applications. IEEE Trans. Comput. 65(2), 495\u2013507 (2016)","journal-title":"IEEE Trans. Comput."},{"key":"707_CR7","unstructured":"Message passing interface forum. http:\/\/www.mpiforum.org\/"},{"key":"707_CR8","doi-asserted-by":"crossref","unstructured":"Lattner, C., Adve, V.: LLVM: a compilation framework for lifelong program analysis & transformation. In: International Symposium on Code Generation and Optimization, pp. 75\u201386. CGO IEEE (2004)","DOI":"10.1109\/CGO.2004.1281665"},{"key":"707_CR9","doi-asserted-by":"crossref","unstructured":"Alexandrov, A., Ionescu, M.F., Schauser, K.E., Scheiman, C.: LogGP: incorporating long messages into the LogP model\u2014one step closer towards a realistic model for parallel computation. In: Proceedings of the Seventh Annual ACM Symposium on Parallel Algorithms and Architectures, pp. 95\u2013105. ACM (1995)","DOI":"10.1145\/215399.215427"},{"key":"707_CR10","doi-asserted-by":"crossref","unstructured":"Hoefler, T., Mehlan, T., Lumsdaine, A., Rehm, W.: Netgauge: a network performance measurement framework. In: International Conference on High Performance Computing and Communications, pp. 659\u2013671. Springer, Berlin (2007)","DOI":"10.1007\/978-3-540-75444-2_62"},{"key":"707_CR11","unstructured":"SKaMPI project. http:\/\/liinwww.ira.uka.de\/~skampi\/"},{"key":"707_CR12","doi-asserted-by":"crossref","unstructured":"Wu, Y., Larus, J.R.: Static branch frequency and program profile analysis. In: Proceedings of the 27th Annual International Symposium on Microarchitecture, pp. 1\u201311. ACM (1994)","DOI":"10.1145\/192724.192725"},{"key":"707_CR13","first-page":"670","volume-title":"Compilers, Principles, Techniques","author":"AV Aho","year":"1986","unstructured":"Aho, A.V., Sethi, R., Ullman, J.D.: Compilers, Principles, Techniques, pp. 670\u2013671. Addison wesley, Boston (1986)"},{"key":"707_CR14","doi-asserted-by":"crossref","unstructured":"Hoefler, T., Lichei, A., Rehm, W.: Low-overhead LogGP parameter assessment for modern interconnection networks. In: 2007 IEEE International Parallel and Distributed Processing Symposium, pp. 1\u20138. IEEE (2007)","DOI":"10.1109\/IPDPS.2007.370593"},{"issue":"2","key":"707_CR15","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1007\/s10586-007-0012-0","volume":"10","author":"J Pje\u0161ivac-Grbovi\u0107","year":"2007","unstructured":"Pje\u0161ivac-Grbovi\u0107, J., Angskun, T., Bosilca, G., Fagg, G.E., Gabriel, E., Dongarra, J.J.: Performance analysis of MPI collective operations. Clust. Comput. 10(2), 127\u2013143 (2007)","journal-title":"Clust. Comput."},{"key":"707_CR16","unstructured":"OpenMPI project. https:\/\/www.open-mpi.org\/"},{"key":"707_CR17","doi-asserted-by":"crossref","unstructured":"Bailey, D.H., Barszcz, E., Barton, J.T., Browning, D.S., Carter, R.L., Dagum, L., Simon, H.D.: The NAS parallel benchmarks\u2014summary and preliminary results. In: Proceedings of the 1991 ACM\/IEEE Conference on Supercomputing, pp. 158\u2013165. ACM (1991)","DOI":"10.1145\/125826.125925"},{"key":"707_CR18","unstructured":"Stone, A., Dennis, J.M., & Strout, M.M.: The CGPOP miniapp, version 1.0. Colorado State University, Technical Report CS-11-103 (2011)"},{"key":"707_CR19","doi-asserted-by":"crossref","unstructured":"Velho, P., Legrand, A.: Accuracy study and improvement of network simulation in the simgrid framework. In: Proceedings of the second International Conference on Simulation Tools and Techniques. ICST (Institute for Computer Sciences, Social-Informatics and Telecommunications Engineering) (2009)","DOI":"10.4108\/ICST.SIMUTOOLS2009.5592"},{"issue":"1\u20132","key":"707_CR20","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1504\/IJBDI.2014.063837","volume":"1","author":"VE Malyshkin","year":"2014","unstructured":"Malyshkin, V.E.: Peculiarities of numerical algorithms parallel implementation for exa-flops multicomputers. Int. J. Big Data Intell. 1(1\u20132), 65\u201373 (2014)","journal-title":"Int. J. Big Data Intell."},{"issue":"3","key":"707_CR21","doi-asserted-by":"crossref","first-page":"176","DOI":"10.1504\/IJBDI.2016.078399","volume":"3","author":"V Viswanathan","year":"2016","unstructured":"Viswanathan, V.: Discovery of semantic associations in an RDF graph using bi-directional BFS on massively parallel hardware. Int. J. Big Data Intell. 3(3), 176\u2013181 (2016)","journal-title":"Int. J. Big Data Intell."},{"issue":"1","key":"707_CR22","doi-asserted-by":"crossref","first-page":"1","DOI":"10.4018\/ijghpc.2014010101","volume":"6","author":"CC Wu","year":"2014","unstructured":"Wu, C.C., Ke, J.Y., Lin, H., Jhan, S.S.: Adjusting thread parallelism dynamically to accelerate dynamic programming with irregular workload distribution on GPGPUs. Int. J. Grid High Perform. Comput. (IJGHPC) 6(1), 1\u201320 (2014)","journal-title":"Int. J. Grid High Perform. Comput. (IJGHPC)"},{"key":"707_CR23","doi-asserted-by":"crossref","unstructured":"Barker, K.J., Pakin, S., Kerbyson, D.J.: A performance model of the krak hydrodynamics application. In: 2006 International Conference on Parallel Processing (ICPP\u201906), pp. 245\u2013254. IEEE (2006)","DOI":"10.1109\/ICPP.2006.11"},{"issue":"4","key":"707_CR24","doi-asserted-by":"crossref","first-page":"330","DOI":"10.1177\/109434200001400405","volume":"14","author":"A Hoisie","year":"2000","unstructured":"Hoisie, A., Lubeck, O., Wasserman, H.: Performance and scalability analysis of teraflop-scale parallel architectures using multidimensional wavefront applications. Int. J. High Perform. Comput. Appl. 14(4), 330\u2013346 (2000)","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"707_CR25","doi-asserted-by":"crossref","unstructured":"Calotoiu, A., Hoefler, T., Poke, M., Wolf, F.: Using automated performance modeling to find scalability bugs in complex codes. In: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis. ACM (2013)","DOI":"10.1145\/2503210.2503277"},{"key":"707_CR26","doi-asserted-by":"crossref","unstructured":"Cascaval, C., DeRose, L., Padua, D.A., Reed, D.A.: Compile-time based performance prediction. In: International Workshop on Languages and Compilers for Parallel Computing, pp. 365\u2013379. Springer, Berlin (1999)","DOI":"10.1007\/3-540-44905-1_23"},{"key":"707_CR27","doi-asserted-by":"crossref","unstructured":"Zhai, J., Chen, W., Zheng, W.: Phantom: predicting performance of parallel applications on large-scale parallel machines using a single node. In: ACM Sigplan Notices, vol. 45, no. 5, pp. 305\u2013314. ACM (2010)","DOI":"10.1145\/1693453.1693493"}],"container-title":["Cluster Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10586-016-0707-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-016-0707-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-016-0707-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,16]],"date-time":"2019-09-16T19:55:25Z","timestamp":1568663725000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10586-016-0707-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,12,27]]},"references-count":27,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2017,6]]}},"alternative-id":["707"],"URL":"https:\/\/doi.org\/10.1007\/s10586-016-0707-1","relation":{},"ISSN":["1386-7857","1573-7543"],"issn-type":[{"value":"1386-7857","type":"print"},{"value":"1573-7543","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,12,27]]}}}