{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T16:30:39Z","timestamp":1742920239177,"version":"3.40.3"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319099668"},{"type":"electronic","value":"9783319099675"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-09967-5_18","type":"book-chapter","created":{"date-parts":[[2014,9,30]],"date-time":"2014-09-30T15:10:04Z","timestamp":1412089804000},"page":"307-321","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Compiler Optimizations for Non-contiguous Remote Data Movement"],"prefix":"10.1007","author":[{"given":"Timo","family":"Schneider","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Robert","family":"Gerstenberger","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Torsten","family":"Hoefler","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2014,10,1]]},"reference":[{"key":"18_CR1","doi-asserted-by":"crossref","unstructured":"Alexandrov, A., Ionescu, M.F., Schauser, K.E., Scheiman, C.: LogGP: incorporating long messages into the logP model - one step closer towards a realistic model for parallel computation. In: Proceedings of the 7th Annual ACM Symposium on Parallel Algorithms and Architectures (SPAA\u201995), pp. 95\u2013105 (1995)","DOI":"10.1145\/215399.215427"},{"issue":"11","key":"18_CR2","doi-asserted-by":"publisher","first-page":"394","DOI":"10.1145\/1168918.1168906","volume":"41","author":"S Bansal","year":"2006","unstructured":"Bansal, S., Aiken, A.: Automatic generation of peephole superoptimizers. ACM SIGPLAN Not. 41(11), 394\u2013403 (2006)","journal-title":"ACM SIGPLAN Not."},{"key":"18_CR3","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1177\/109434209100500406","volume":"54","author":"C Bernard","year":"1991","unstructured":"Bernard, C., Ogilvie, M., et al.: Studying quarks and gluons on MIMD parallel computers. Int. J. High Perform. Comput. Appl. 54, 61\u201370 (1991)","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"18_CR4","doi-asserted-by":"crossref","unstructured":"Bronevetsky, G.: Communication-sensitive static dataflow for parallel message passing applications. In: Proceedings of the 7th Annual IEEE\/ACM International Symposium on Code Generation and Optimization (CGO\u201909) (2009)","DOI":"10.1109\/CGO.2009.32"},{"key":"18_CR5","unstructured":"ten Bruggencate, M., Roweth, D.: DMAPP - an API for one-sided program models on Baker systems. In: Cray User Group Conference (CUG\u201910) (2010)"},{"key":"18_CR6","doi-asserted-by":"crossref","unstructured":"Carrington, L., Komatitsch, D., et al.: High-frequency simulations of global seismic wave propagation using SPECFEM3D_GLOBE on 62\u00a0K processors. In: Proceedings of the 22nd International Conference on Supercomputing (SC\u201908) (2008)","DOI":"10.1109\/SC.2008.5215501"},{"key":"18_CR7","doi-asserted-by":"crossref","unstructured":"Cousot, P., Cousot, R.: Abstract interpretation: a unified lattice model for static analysis of programs by construction or approximation of fixpoints. In: Proceedings of the 4th ACM SIGACT-SIGPLAN Symposium on Principles of Programming Languages (POPL\u201977), pp. 238\u2013252 (1977)","DOI":"10.1145\/512950.512973"},{"key":"18_CR8","doi-asserted-by":"crossref","unstructured":"Denis, A.: A high performance superpipeline protocol for InfiniBand. In: Proceedings of the European Conference on Parallel Processing, pp. 276\u2013287 (2011)","DOI":"10.1007\/978-3-642-23397-5_27"},{"issue":"1","key":"18_CR9","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1006\/jpdc.1994.1040","volume":"21","author":"S Hiranandani","year":"1994","unstructured":"Hiranandani, S., Kennedy, K., Tseng, C.W.: Evaluating compiler optimizations for Fortran D. J. Parallel Distrib. Comput. 21(1), 27\u201345 (1994)","journal-title":"J. Parallel Distrib. Comput."},{"issue":"6","key":"18_CR10","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1145\/1993316.1993516","volume":"46","author":"TB Jablin","year":"2011","unstructured":"Jablin, T.B., Prabhu, P., Jablin, J.A., Johnson, N.P., Beard, S.R., August, D.I.: Automatic CPU-GPU communication management and optimization. ACM SIGPLAN Not. 46(6), 142\u2013151 (2011)","journal-title":"ACM SIGPLAN Not."},{"key":"18_CR11","doi-asserted-by":"crossref","unstructured":"Jenkins, J., Dinan, J., et al.: Enabling fast, noncontiguous GPU data movement in hybrid MPI\u00a0+\u00a0GPU environments. In: Proceedings of the IEEE International Conference on Cluster Computing (CLUSTER\u201912) (2012)","DOI":"10.1109\/CLUSTER.2012.72"},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Kjolstad, F., Hoefler, T., Snir, M.: Automatic datatype generation and optimization. In: Proceedings of the 17th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (PPoPP\u201912), pp. 327\u2013328 (2012)","DOI":"10.1145\/2145816.2145878"},{"key":"18_CR13","unstructured":"MPI Forum: MPI: A Message-Passing Interface Standard. Version 3"},{"issue":"2","key":"18_CR14","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/289918.289920","volume":"17","author":"RW Numrich","year":"1998","unstructured":"Numrich, R.W., Reid, J.: Co-array Fortran for parallel programming. SIGPLAN Fortran Forum 17(2), 1\u201331 (1998)","journal-title":"SIGPLAN Fortran Forum"},{"key":"18_CR15","first-page":"617","volume-title":"High Performance Mass Storage and Parallel I\/O","author":"GF Pfister","year":"2001","unstructured":"Pfister, G.F.: An introduction to the infiniband architecture. In: Hai, J., Toni, C., Buyya, R. (eds.) High Performance Mass Storage and Parallel I\/O, pp. 617\u2013632. Wiley, New York (2001)"},{"key":"18_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1007\/978-3-540-30218-6_14","volume-title":"Recent Advances in Parallel Virtual Machine and Message Passing Interface","author":"G Santhanaraman","year":"2004","unstructured":"Santhanaraman, G., Wu, J., Panda, D.K.: Zero-copy MPI derived datatype communication over infiniband. In: Kranzlm\u00fcller, D., Kacsuk, P., Dongarra, J. (eds.) EuroPVM\/MPI 2004. LNCS, vol. 3241, pp. 47\u201356. Springer, Heidelberg (2004)"},{"key":"18_CR17","doi-asserted-by":"crossref","unstructured":"Schkufza, E., Sharma, R., Aiken, A.: Stochastic superoptimization. In: Proceedings of the International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS\u201913), pp. 305\u2013316 (2013)","DOI":"10.1145\/2451116.2451150"},{"key":"18_CR18","first-page":"279","volume":"964","author":"T Schneider","year":"2013","unstructured":"Schneider, T., Gerstenberger, R., Hoefler, T.: Application-oriented ping-pong benchmarking: how to assess the real communication overheads. J. Comput. 964, 279\u2013292 (2013)","journal-title":"J. Comput."},{"key":"18_CR19","doi-asserted-by":"crossref","unstructured":"Schneider, T., Kjolstad, F., Hoefler, T.: MPI datatype processing using runtime compilation. In: Proceedings of EuroMPI\u201913, September 2013","DOI":"10.1145\/2488551.2488552"},{"issue":"7","key":"18_CR20","doi-asserted-by":"publisher","first-page":"3465","DOI":"10.1016\/j.jcp.2007.01.037","volume":"227","author":"WC Skamarock","year":"2008","unstructured":"Skamarock, W.C., Klemp, J.B.: A time-split nonhydrostatic atmospheric model for weather research and forecasting applications. J. Comput. Phys. 227(7), 3465\u20133485 (2008)","journal-title":"J. Comput. Phys."},{"key":"18_CR21","doi-asserted-by":"crossref","unstructured":"UPC Consortium: UPC language specifications. Version 1. 2 (2005)","DOI":"10.2172\/862127"},{"key":"18_CR22","unstructured":"der Wijngaart, R.F.V., Wong, P.: NAS parallel benchmarks version 2.4. Technical report, NAS Technical Report NAS-02-007 (2002)"},{"key":"18_CR23","unstructured":"Woodacre, M., Robb, D., Roe, D., Feind, K.: The SGI AltixTM 3000 global shared memory architecture (2005)"}],"container-title":["Lecture Notes in Computer Science","Languages and Compilers for Parallel Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-09967-5_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,20]],"date-time":"2023-02-20T11:01:43Z","timestamp":1676890903000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-09967-5_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319099668","9783319099675"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-09967-5_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"1 October 2014","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}