{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T07:01:40Z","timestamp":1772521300249,"version":"3.50.1"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319172477","type":"print"},{"value":"9783319172484","type":"electronic"}],"license":[{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2015,1,1]],"date-time":"2015-01-01T00:00:00Z","timestamp":1420070400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015]]},"DOI":"10.1007\/978-3-319-17248-4_8","type":"book-chapter","created":{"date-parts":[[2015,4,17]],"date-time":"2015-04-17T15:11:09Z","timestamp":1429283469000},"page":"149-171","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Modeling Stencil Computations on Modern HPC Architectures"],"prefix":"10.1007","author":[{"given":"Ra\u00fal","family":"de la Cruz","sequence":"first","affiliation":[]},{"given":"Mauricio","family":"Araya-Polo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,4,18]]},"reference":[{"key":"8_CR1","first-page":"185","volume":"17","author":"M Araya-Polo","year":"2008","unstructured":"Araya-Polo, M., Rubio, F., Hanzich, M., de la Cruz, R., Cela, J.M., Scarpazza, D.P.: 3D seismic imaging through reverse-time migration on homogeneous and heterogeneous multi-core processors. Sci. Program. Spec. Issue Cell Processor 17, 185\u2013198 (2008)","journal-title":"Sci. Program. Spec. Issue Cell Processor"},{"key":"8_CR2","volume-title":"Computational Aspects of Astrophysical MHD and Turbulence,","author":"A Brandenburg","year":"2003","unstructured":"Brandenburg, A.: Computational Aspects of Astrophysical MHD and Turbulence, vol. 9. Taylor and Francis, London (2003)"},{"key":"8_CR3","doi-asserted-by":"crossref","unstructured":"Christen, M., Schenk, O., Burkhart, H.: PATUS: A code generation and autotuning framework for parallel iterative stencil computations on modern microarchitectures. In: Proceedings of the 2011 IEEE International Parallel and Distributed Processing Symposium, IPDPS 2011, pp. 676\u2013687. IEEE Computer Society, Washington, DC (2011)","DOI":"10.1109\/IPDPS.2011.70"},{"issue":"1","key":"8_CR4","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1137\/070693199","volume":"51","author":"K Datta","year":"2009","unstructured":"Datta, K., Kamil, S., Williams, S., Oliker, L., Shalf, J., Yelick, K.: Optimization and performance modeling of stencil computations on modern microprocessors. SIAM Rev. 51(1), 129\u2013159 (2009)","journal-title":"SIAM Rev."},{"key":"8_CR5","doi-asserted-by":"crossref","unstructured":"de la Cruz, R., Araya-Polo, M.: Towards a multi-level cache performance model for 3D stencil computation. In: Proceedings of the International Conference on Computational Science, ICCS 2011. Procedia Computer Science, Singapore, vol. 4, pp. 2146\u20132155. Elsevier (2011)","DOI":"10.1016\/j.procs.2011.04.235"},{"issue":"3","key":"8_CR6","doi-asserted-by":"crossref","first-page":"23:1","DOI":"10.1145\/2591006","volume":"40","author":"R de la Cruz","year":"2014","unstructured":"de la Cruz, R., Araya-Polo, M.: Algorithm 942: semi-stencil. ACM Trans. Math. Softw. 40(3), 23:1\u201323:39 (2014)","journal-title":"ACM Trans. Math. Softw."},{"key":"8_CR7","unstructured":"Fang, J., Varbanescu, A.L., Sips, H.J., Zhang, L., Che, Y., Xu, C.: An empirical study of intel xeon phi. CoRR, abs\/1310.5842 (2013)"},{"key":"8_CR8","doi-asserted-by":"publisher","first-page":"447","DOI":"10.1142\/S0218396X08003683","volume":"16","author":"C De Groot-Hedlin","year":"2008","unstructured":"De Groot-Hedlin, C.: A finite difference solution to the Helmholtz equation in a radially symmetric waveguide: application to near-source scattering in ocean acoustics. J. Comput. Acoust. 16, 447\u2013464 (2008)","journal-title":"J. Comput. Acoust."},{"key":"8_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"473","DOI":"10.1007\/BFb0100608","volume-title":"High-Performance Computing and Networking","author":"JS Harper","year":"1999","unstructured":"Harper, J.S., Kerbyson, D.J., Nudd, G.R.: Efficient analytical modelling of multi-level set-associative caches. In: Sloot, P.M.A., Hoekstra, A.G., Bubak, M., Hertzberger, B. (eds.) HPCN-Europe 1999. LNCS, vol. 1593, pp. 473\u2013482. Springer, Heidelberg (1999)"},{"key":"8_CR10","doi-asserted-by":"crossref","unstructured":"Kamil, S., Chan, C., Oliker, L., Shalf, J., Williams, S.: An auto-tuning framework for parallel multicore stencil computations. In: Proceedings of the International Parallel and Distributed Processing Symposium (IPDPS), pp. 1\u201312, April 2010","DOI":"10.1109\/IPDPS.2010.5470421"},{"key":"8_CR11","doi-asserted-by":"crossref","unstructured":"Kamil, S., Datta, K., Williams, S., Oliker, L., Shalf, J., Yelick, K.: Implicit and explicit optimizations for stencil computations. In: MSPC 2006: Proceedings of the 2006 workshop on Memory System Performance and Correctness, pp. 51\u201360. ACM, New York (2006)","DOI":"10.1145\/1178597.1178605"},{"key":"8_CR12","doi-asserted-by":"crossref","unstructured":"Kamil, S., Husbands, P., Oliker, L., Shalf, J., Yelick, K.: Impact of modern memory subsystems on cache optimizations for stencil computations. In: MSP 2005: Proceedings of the 2005 workshop on Memory System Performance, pp. 36\u201343. ACM Press, New York (2005)","DOI":"10.1145\/1111583.1111589"},{"issue":"1\u20132","key":"8_CR13","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1016\/j.jsv.2008.03.024","volume":"317","author":"J Kormann","year":"2008","unstructured":"Kormann, J., Cobo, P., Prieto, A.: Perfectly matched layers for modelling seismic oceanography experiments. J. Sound Vib. 317(1\u20132), 354\u2013365 (2008)","journal-title":"J. Sound Vib."},{"key":"8_CR14","doi-asserted-by":"crossref","unstructured":"Marin, G., McCurdy, C., Vetter, J.S.: Diagnosis and optimization of application prefetching performance. In: Proceedings of the 27th International ACM Conference on International Conference on Supercomputing, ICS 2013, pp. 303\u2013312. ACM, New York (2013)","DOI":"10.1145\/2464996.2465014"},{"key":"8_CR15","unstructured":"McCalpin, J.D.: Stream: Sustainable memory bandwidth in high performance computers. Technical report, University of Virginia, Charlottesville, Virginia, 1991\u20132007. A continually updated technical report. http:\/\/www.cs.virginia.edu\/stream\/"},{"key":"8_CR16","doi-asserted-by":"crossref","unstructured":"McCurdy, C., Marin, G., Vetter, J.S.: Characterizing the impact of prefetching on scientific application performance. In: International Workshop on Performance Modeling, Benchmarking and Simulation of HPC Systems (PMBS13), Denver, CO (2013)","DOI":"10.1007\/978-3-319-10214-6_6"},{"key":"8_CR17","doi-asserted-by":"crossref","unstructured":"Mehta, S., Fang, Z., Zhai, A., Yew, P.-C.: Multi-stage coordinated prefetching for present-day processors. In: Proceedings of the 28th ACM International Conference on Supercomputing, ICS 2014, pp. 73\u201382. ACM, New York (2014)","DOI":"10.1145\/2597652.2597660"},{"key":"8_CR18","unstructured":"Nishtala, R., Vuduc, R.W., Demmel, J.W., Yelick, K.A.: Performance modeling and analysis of cache blocking in sparse matrix vector multiply. Technical report UCB\/CSD-04-1335, EECS Department, University of California, Berkeley (2004)"},{"key":"8_CR19","doi-asserted-by":"crossref","unstructured":"Faizur Rahman, S.M., Yi, Q., Qasem, A.: Understanding stencil code performance on multicore architectures. In: Proceedings of the 8th ACM International Conference on Computing Frontiers, CF 2011, pp. 30:1\u201330:10. ACM, New York (2011)","DOI":"10.1145\/2016604.2016641"},{"key":"8_CR20","doi-asserted-by":"publisher","first-page":"1192","DOI":"10.1109\/TAP.2006.869894","volume":"54","author":"A Ray","year":"2006","unstructured":"Ray, A., Kondayya, G., Menon, S.V.G.: Developing a finite difference time domain parallel code for nuclear electromagnetic field simulation. IEEE Trans. Antennas Propag. 54, 1192\u20131199 (2006)","journal-title":"IEEE Trans. Antennas Propag."},{"key":"8_CR21","doi-asserted-by":"crossref","unstructured":"Rivera, G., Tseng, C.W.: Tiling optimizations for 3D scientific computations. In: Proceedings of the ACM\/IEEE Supercomputing Conference (SC 2000), p. 32. IEEE Computer Society, Washington, DC, November 2000","DOI":"10.1109\/SC.2000.10015"},{"key":"8_CR22","unstructured":"Strzodka, R., Shaheen, M., Pajak, D.: Impact of system and cache bandwidth on stencil computation across multiple processor generations. In: Proceedings of the Workshop on Applications for Multi- and Many-Core Processors (A4MMC) at ISCA 2011, June 2011"},{"key":"8_CR23","doi-asserted-by":"crossref","unstructured":"Temam, O., Fricker, C., Jalby, W.: Cache interference phenomena. In: Proceedings of the 1994 ACM SIGMETRICS Conference on Measurement and Modeling of Computer Systems, SIGMETRICS 1994, pp. 261\u2013271. ACM, New York (1994)","DOI":"10.1145\/183018.183047"},{"key":"8_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"615","DOI":"10.1007\/978-3-642-14390-8_64","volume-title":"Parallel Processing and Applied Mathematics","author":"J Treibig","year":"2010","unstructured":"Treibig, J., Hager, G.: Introducing a performance model for bandwidth-limited loop kernels. In: Wyrzykowski, R., Dongarra, J., Karczewski, K., Wasniewski, J. (eds.) PPAM 2009, Part I. LNCS, vol. 6067, pp. 615\u2013624. Springer, Heidelberg (2010)"},{"key":"8_CR25","doi-asserted-by":"crossref","unstructured":"Williams, S.W., Waterman, A., Patterson, D.A.: Roofline: An insightful visual performance model for floating-point programs and multicore architectures. Technical report UCB\/EECS-2008-134, EECS Department, University of California, Berkeley, October 2008","DOI":"10.2172\/1407078"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing Systems. Performance Modeling, Benchmarking, and Simulation"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-17248-4_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T14:46:37Z","timestamp":1675262797000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-17248-4_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015]]},"ISBN":["9783319172477","9783319172484"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-17248-4_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015]]},"assertion":[{"value":"18 April 2015","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}