{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,28]],"date-time":"2025-05-28T04:18:28Z","timestamp":1748405908251,"version":"3.41.0"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319174723"},{"type":"electronic","value":"9783319174730"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-17473-0_24","type":"book-chapter","created":{"date-parts":[[2015,4,30]],"date-time":"2015-04-30T09:59:39Z","timestamp":1430387979000},"page":"367-381","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Unification of Static and Dynamic Analyses to Enable Vectorization"],"prefix":"10.1007","author":[{"given":"Ashay","family":"Rane","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rakesh","family":"Krishnaiyer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chris J.","family":"Newburn","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"James","family":"Browne","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Leonardo","family":"Fialho","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zakhar","family":"Matveev","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,5,1]]},"reference":[{"key":"24_CR1","doi-asserted-by":"crossref","unstructured":"Bailey, D.H., Barszcz, E., Barton, J.T., Browning, D.S., Carter, R.L., Dagum, L., Fatoohi, R.A., Frederickson, P.O., Lasinski, T.A., Schreiber, R.S., Simon, H.D., Venkatakrishnan, V., Weeratunga, S.K.: The NAS parallel benchmarks - summary and preliminary results. In: Proceedings of the 1991 ACM\/IEEE Conference on Supercomputing, Supercomputing 1991, pp. 158\u2013165. ACM, New York (1991)","DOI":"10.1145\/125826.125925"},{"key":"24_CR2","doi-asserted-by":"crossref","unstructured":"Brett, B., Kumar, P., Kim, M., Kim, H.: CHiP: a profiler to measure the effect of cache contention on scalability. In: Proceedings of the 2013 IEEE 27th International Symposium on Parallel and Distributed Processing Workshops, IPDPSW 2013, pp. 1565\u20131574. IEEE Computer Society, Washington, DC (2013)","DOI":"10.1109\/IPDPSW.2013.49"},{"key":"24_CR3","doi-asserted-by":"crossref","unstructured":"Callahan, D., Dongarra, J., Levine, D.: Vectorizing compilers: a test suite and results. In: Proceedings of the 1988 ACM\/IEEE Conference on Supercomputing, Supercomputing 1988, pp. 98\u2013105. IEEE Computer Society Press, Los Alamitos (1988)","DOI":"10.1109\/SUPERC.1988.44642"},{"key":"24_CR4","doi-asserted-by":"crossref","unstructured":"Che, S., Boyer, M., Meng, J., Tarjan, D., Sheaffer, J., Lee, S.H., Skadron, K.: Rodinia: a benchmark suite for heterogeneous computing. In: IEEE International Symposium on Workload Characterization, IISWC 2009, pp. 44\u201354, October 2009","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"24_CR5","doi-asserted-by":"crossref","unstructured":"Chung, I.H., Cong, G., Klepacki, D., Sbaraglia, S., Seelam, S., Wen, H.F.: A framework for automated performance bottleneck detection. In: IEEE International Symposium on Parallel and Distributed Processing, IPDPS 2008, pp. 1\u20137, April 2008","DOI":"10.1109\/IPDPS.2008.4536182"},{"key":"24_CR6","doi-asserted-by":"crossref","unstructured":"Evans, G.C., Abraham, S., Kuhn, B., Padua, D.A.: Vector seeker: a tool for finding vector potential. In: Proceedings of the 2014 Workshop on Programming Models for SIMD\/Vector Processing, WPMVP 2014, pp. 41\u201348. ACM, New York (2014)","DOI":"10.1145\/2568058.2568069"},{"key":"24_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1007\/978-3-319-07518-1_17","volume-title":"Supercomputing","author":"L Fialho","year":"2014","unstructured":"Fialho, L., Browne, J.: Framework and modular infrastructure for automation of architectural adaptation and performance optimization for HPC systems. In: Kunkel, J.M., Ludwig, T., Meuer, H.W. (eds.) ISC 2014. LNCS, vol. 8488, pp. 261\u201377. Springer, Heidelberg (2014)"},{"issue":"6","key":"24_CR8","doi-asserted-by":"crossref","first-page":"371","DOI":"10.1145\/2345156.2254108","volume":"47","author":"J Holewinski","year":"2012","unstructured":"Holewinski, J., Ramamurthi, R., Ravishankar, M., Fauzia, N., Pouchet, L.N., Rountev, A., Sadayappan, P.: Dynamic trace-based analysis of vectorization potential of applications. SIGPLAN Not. 47(6), 371\u201382 (2012)","journal-title":"SIGPLAN Not."},{"key":"24_CR9","doi-asserted-by":"crossref","unstructured":"Hornung, R., Keasler, J.: A case for improved C++ compiler support to enable performance portability in large physics simulation codes. Technical report, Lawrence Livermore National Laboratory (LLNL), Livermore, CA (2013)","DOI":"10.2172\/1078540"},{"key":"24_CR10","doi-asserted-by":"crossref","unstructured":"Karlin, I., Bhatele, A., Keasler, J., Chamberlain, B.L., Cohen, J., Devito, Z., Haque, R., Laney, D., Luke, E., Wang, F., Richards, D., Schulz, M., Still, C.H.: Exploring traditional and emerging parallel programming models using a proxy application. In: Parallel and Distributed Processing Symposium, International, pp. 919\u2013932 (2013)","DOI":"10.1109\/IPDPS.2013.115"},{"key":"24_CR11","doi-asserted-by":"crossref","unstructured":"Karlin, I., Keasler, J., Neely, R.: Lulesh 2.0 updates and changes. Technical report LLNL-TR-641973, Lawrence Livermore National Laboratory (2013)","DOI":"10.2172\/1090032"},{"key":"24_CR12","doi-asserted-by":"crossref","unstructured":"Krishnaiyer, R., Kultursay, E., Chawla, P., Preis, S., Zvezdin, A., Saito, H.: Compiler-based data prefetching and streaming non-temporal store generation for the intel(r) xeon phi(tm) coprocessor. In: 2013 IEEE 27th International Parallel and Distributed Processing Symposium Workshops Ph.D. Forum (IPDPSW), pp. 1575\u20131586, May 2013","DOI":"10.1109\/IPDPSW.2013.231"},{"key":"24_CR13","doi-asserted-by":"crossref","unstructured":"Kristof, P., Yu, H., Li, Z., Tian, X.: Performance study of simd programming models on intel multicore processors. In: 2012 IEEE 26th International Parallel and Distributed Processing Symposium Workshops Ph.D. Forum (IPDPSW), pp. 2423\u20132432, May 2012","DOI":"10.1109\/IPDPSW.2012.299"},{"issue":"7","key":"24_CR14","doi-asserted-by":"publisher","first-page":"812","DOI":"10.1109\/71.238302","volume":"4","author":"J Larus","year":"1993","unstructured":"Larus, J.: Loop-level parallelism in numeric and symbolic programs. IEEE Trans. Parallel Distrib. Syst. 4(7), 812\u201326 (1993)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"24_CR15","doi-asserted-by":"crossref","unstructured":"Maleki, S., Gao, Y., Garzaran, M., Wong, T., Padua, D.: An evaluation of vectorizing compilers. In: 2011 International Conference on Parallel Architectures and Compilation Techniques (PACT), pp. 372\u2013382, October 2011","DOI":"10.1109\/PACT.2011.68"},{"key":"24_CR16","unstructured":"McCalpin, J.D.: A survey of memory bandwidth and machine balance in current high performance computers. IEEE TCCA Newsl. 19\u201325 (1995)"},{"key":"24_CR17","doi-asserted-by":"crossref","unstructured":"Oancea, C.E., Rauchwerger, L.: Logical inference techniques for loop parallelization. In: Proceedings of the 33rd ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI 2012, pp. 509\u2013520. ACM, New York (2012)","DOI":"10.1145\/2254064.2254124"},{"issue":"2\/3","key":"24_CR18","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1142\/S0129626400000214","volume":"10","author":"DJ Quinlan","year":"2000","unstructured":"Quinlan, D.J.: ROSE: compiler support for object-oriented frameworks. Parallel Process. Lett. 10(2\/3), 215\u201326 (2000)","journal-title":"Parallel Process. Lett."},{"issue":"1","key":"24_CR19","doi-asserted-by":"publisher","first-page":"3:1","DOI":"10.1145\/2588788","volume":"1","author":"A Rane","year":"2014","unstructured":"Rane, A., Browne, J.: Enhancing performance optimization of multicore\/multichip nodes with data structure metrics. ACM Trans. Parallel Comput. 1(1), 3:1\u20133:20 (2014)","journal-title":"ACM Trans. Parallel Comput."},{"issue":"9","key":"24_CR20","doi-asserted-by":"publisher","first-page":"1068","DOI":"10.1002\/nme.2930","volume":"84","author":"C Rosales","year":"2010","unstructured":"Rosales, C., Whyte, D.S.: Dual grid lattice boltzmann method for multiphase flows. Int. J. Numer. Meth. Eng. 84(9), 1068\u201384 (2010)","journal-title":"Int. J. Numer. Meth. Eng."},{"key":"24_CR21","doi-asserted-by":"crossref","unstructured":"Satish, N., Kim, C., Chhugani, J., Saito, H., Krishnaiyer, R., Smelyanskiy, M., Girkar, M., Dubey, P.: Can traditional programming bridge the Ninja performance gap for parallel computing applications? In: Proceedings of the 39th Annual International Symposium on Computer Architecture, ISCA 2012, pp. 440\u2013451. IEEE Computer Society, Washington, DC (2012)","DOI":"10.1109\/ISCA.2012.6237038"},{"issue":"5","key":"24_CR22","doi-asserted-by":"publisher","first-page":"488","DOI":"10.1007\/s10766-009-0102-0","volume":"37","author":"G Shi","year":"2009","unstructured":"Shi, G., Kindratenko, V., Gottlieb, S.: The bottom-up implementation of one MILC lattice QCD application on the cell blade. Int. J. Parallel Program. 37(5), 488\u2013507 (2009)","journal-title":"Int. J. Parallel Program."},{"key":"24_CR23","doi-asserted-by":"crossref","unstructured":"Zhong, H., Mehrara, M., Lieberman, S., Mahlke, S.: Uncovering hidden loop level parallelism in sequential applications. In: IEEE 14th International Symposium on High Performance Computer Architecture, HPCA 2008, pp. 290\u2013301, February 2008","DOI":"10.1109\/HPCA.2008.4658647"},{"key":"24_CR24","doi-asserted-by":"crossref","unstructured":"Zhuang, X., Eichenberger, A., Luo, Y., O\u2019Brien, K., O\u2019Brien, K.: Exploiting parallelism with dependence-aware scheduling. In: 18th International Conference on Parallel Architectures and Compilation Techniques, PACT 2009, pp. 193\u2013202, September 2009","DOI":"10.1109\/PACT.2009.10"}],"container-title":["Lecture Notes in Computer Science","Languages and Compilers for Parallel Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-17473-0_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,27]],"date-time":"2025-05-27T18:35:28Z","timestamp":1748370928000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-17473-0_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319174723","9783319174730"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-17473-0_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2015]]},"assertion":[{"value":"1 May 2015","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}