{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T02:38:10Z","timestamp":1725849490760},"publisher-location":"Cham","reference-count":21,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319297774"},{"type":"electronic","value":"9783319297781"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-29778-1_9","type":"book-chapter","created":{"date-parts":[[2016,2,19]],"date-time":"2016-02-19T04:16:33Z","timestamp":1455855393000},"page":"140-155","source":"Crossref","is-referenced-by-count":3,"title":["HYDRA : Extending Shared Address Programming for Accelerator Clusters"],"prefix":"10.1007","author":[{"given":"Putt","family":"Sakdhnagool","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Amit","family":"Sabne","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rudolf","family":"Eigenmann","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,2,20]]},"reference":[{"key":"9_CR1","first-page":"1","volume":"41","author":"H Bae","year":"2012","unstructured":"Bae, H., Mustafa, D., Lee, J.W., Aurangzeb, B., Lin, H., Dave, C., Eigenmann, R., Midkiff, S.: The Cetus source-to-source compiler infrastructure: Overview and evaluation. Int. J. Parallel Program. 41, 1\u201315 (2012)","journal-title":"Int. J. Parallel Program."},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Bueno, J., Planas, J., Duran, A., Badia, R., Martorell, X., Ayguade, E., Labarta, J.: Productive programming of GPU clusters with OmpSs. In: IEEE 26th International Parallel Distributed Processing Symposium, IPDPS 2012, pp. 557\u2013568, May 2012","DOI":"10.1109\/IPDPS.2012.58"},{"key":"9_CR3","doi-asserted-by":"crossref","unstructured":"Bueno, J., Martorell, X., Badia, R.M., Ayguad\u00e9, E., Labarta, J.: Implementing OmpSs support for regions of data in architectures with multiple address spaces. In: Proceedings of the 27th International ACM Conference on International Conference on Supercomputing, pp. 359\u2013368. ACM, NY, USA, New York (2013)","DOI":"10.1145\/2464996.2465017"},{"key":"9_CR4","doi-asserted-by":"crossref","unstructured":"Che, S., Boyer, M., Meng, J., Tarjan, D., Sheaffer, J.W., Lee, S.H., Skadron, K.: Rodinia: A benchmark suite for heterogeneous computing. In: Proceedings of the 2009 IEEE International Symposium on Workload Characterization, IISWC 2009, pp. 44\u201354. IEEE Computer Society, Washington, DC (2009)","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"9_CR5","doi-asserted-by":"crossref","unstructured":"UPC Consortium: UPC language specifications, v1.2. Technical report LBNL-59208, Lawrence Berkeley National Lab (2005)","DOI":"10.2172\/862127"},{"key":"9_CR6","unstructured":"Corporation, I.: Intel\n                      \n                        \n                      \n                      $$\\textregistered $$\n                     SDK for OpenCL applications XE R3. (2013). \n                      https:\/\/software.intel.com\/sites\/products\/documentation\/ioclsdk\/2013XE\/UG\/index.htm"},{"key":"9_CR7","doi-asserted-by":"crossref","unstructured":"Dwarkadas, S., Cox, A.L., Zwaenepoel, W.: An integrated compile-time\/run-time software distributed shared memory system. In: Proceedings of the Seventh International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS VII, pp. 186\u2013197. ACM, NY, USA, New York (1996)","DOI":"10.1145\/248208.237181"},{"key":"9_CR8","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1109\/TPDS.2010.62","volume":"22","author":"TD Han","year":"2011","unstructured":"Han, T.D., Abdelrahman, T.S.: hiCUDA: High-level GPGPU programming. IEEE Trans. Parallel Distrib. Syst. 22, 78\u201390 (2011)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"9_CR9","doi-asserted-by":"crossref","unstructured":"Kim, J., Seo, S., Lee, J., Nah, J., Jo, G., Lee, J.: SnuCL: An OpenCL framework for heterogeneous CPU\/GPU clusters. In: Proceedings of the 26th ACM International Conference on Supercomputing, ICS 2012, pp. 341\u2013352. ACM, NY, USA, New York (2012)","DOI":"10.1145\/2304576.2304623"},{"key":"9_CR10","doi-asserted-by":"crossref","unstructured":"Kwon, O., Jubair, F., Eigenmann, R., Midkiff, S.: A hybrid approach of OpenMP for clusters. In: Proceedings of the 17th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 75\u201384 (2012)","DOI":"10.1145\/2145816.2145827"},{"key":"9_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-642-36036-7_1","volume-title":"Languages and Compilers for Parallel Computing","author":"O Kwon","year":"2013","unstructured":"Kwon, O., Jubair, F., Min, S.-J., Bae, H., Eigenmann, R., Midkiff, S.P.: Automatic scaling of OpenMP beyond shared memory. In: Rajopadhye, S., Mills Strout, M. (eds.) LCPC 2011. LNCS, vol. 7146, pp. 1\u201315. Springer, Heidelberg (2013)"},{"key":"9_CR12","doi-asserted-by":"crossref","unstructured":"Landaverde, R., Zhang, T., Coskun, A.K., Herbordt, M.: An investigation of unified memory access performance in cuda. In: Proceedings of the IEEE High Performance Extreme Computing Conference (2014)","DOI":"10.1109\/HPEC.2014.7040988"},{"key":"9_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"429","DOI":"10.1007\/978-3-642-29737-3_48","volume-title":"Euro-Par 2011: Parallel Processing Workshops","author":"J Lee","year":"2012","unstructured":"Lee, J., Tran, M.T., Odajima, T., Boku, T., Sato, M.: An extension of XcalableMP PGAS lanaguage for multi-node GPU clusters. In: Alexander, M., et al. (eds.) Euro-Par 2011, Part I. LNCS, vol. 7155, pp. 429\u2013439. Springer, Heidelberg (2012)"},{"key":"9_CR14","doi-asserted-by":"crossref","unstructured":"Lee, S., Eigenmann, R.: OpenMPC: Extended OpenMP programming and tuning for GPUs. In: Proceedings of the 2010 ACM\/IEEE International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201311 (2010)","DOI":"10.1109\/SC.2010.36"},{"issue":"2","key":"9_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/289918.289920","volume":"17","author":"RW Numrich","year":"1998","unstructured":"Numrich, R.W., Reid, J.: Co-array fortran for parallel programming. SIGPLAN Fortran Forum 17(2), 1\u201331 (1998)","journal-title":"SIGPLAN Fortran Forum"},{"key":"9_CR16","doi-asserted-by":"crossref","unstructured":"Potluri, S., Bureddy, D., Wang, H., Subramoni, H., Panda, D.: Extending OpenSHMEM for GPU computing. In: 2013 IEEE 27th International Symposium on Parallel Distributed Processing (IPDPS), pp. 1001\u20131012, May 2013","DOI":"10.1109\/IPDPS.2013.104"},{"issue":"4","key":"9_CR17","doi-asserted-by":"publisher","first-page":"60: 1","DOI":"10.1145\/2544100","volume":"10","author":"T Ramashekar","year":"2013","unstructured":"Ramashekar, T., Bondhugula, U.: Automatic data allocation and buffer management for multi-GPU machines. ACM Trans. Archit. Code Optim. 10(4), 60: 1\u201360: 26 (2013)","journal-title":"ACM Trans. Archit. Code Optim."},{"key":"9_CR18","doi-asserted-by":"crossref","unstructured":"Forum, High Performance Fortran: High performance fortran language specification. SIGPLAN Fortran Forum, vol. 12 (4), 1\u201386, December 1993","DOI":"10.1145\/174223.158909"},{"key":"9_CR19","doi-asserted-by":"crossref","unstructured":"Thies, W., Karczmarek, M., Gordon, M.I., Maze, D.Z., Wong, J., Hoffman, H., Brown, M., Amarasinghe, S.: Streamit: A compiler for streaming applications. Technical report MIT\/LCS Technical Memo LCS-TM-622, Massachusetts Institute of Technology, Cambridge, MA, December 2001","DOI":"10.1007\/3-540-45937-5_14"},{"issue":"5","key":"9_CR20","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1109\/MCSE.2011.83","volume":"13","author":"J Vetter","year":"2011","unstructured":"Vetter, J., Glassbrook, R., Dongarra, J., Schwan, K., Loftis, B., McNally, S., Meredith, J., Rogers, J., Roth, P., Spafford, K., Yalamanchili, S.: Keeneland: Bringing heterogeneous GPU computing to the computational science community. Comput. Sci. Eng. 13(5), 90\u201395 (2011)","journal-title":"Comput. Sci. Eng."},{"key":"9_CR21","doi-asserted-by":"crossref","unstructured":"Yelick, K., Semenzato, L., Pike, G., Miyamoto, C., Liblit, B., Krishnamurthy, A., Hilfinger, P., Graham, S., Gay, D., Colella, P., Aiken, A.: Titanium: A high-performance Java dialect. In: ACM, pp. 10\u201311 (1998)","DOI":"10.1002\/(SICI)1096-9128(199809\/11)10:11\/13<825::AID-CPE383>3.0.CO;2-H"}],"container-title":["Lecture Notes in Computer Science","Languages and Compilers for Parallel Computing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-29778-1_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,1]],"date-time":"2019-06-01T14:41:44Z","timestamp":1559400104000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-29778-1_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319297774","9783319297781"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-29778-1_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2016]]}}}