{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,14]],"date-time":"2025-05-14T00:40:03Z","timestamp":1747183203886,"version":"3.40.5"},"publisher-location":"Cham","reference-count":16,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319143248"},{"type":"electronic","value":"9783319143255"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-14325-5_16","type":"book-chapter","created":{"date-parts":[[2014,12,10]],"date-time":"2014-12-10T08:55:04Z","timestamp":1418201704000},"page":"177-188","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Towards the Transparent Execution of Compound OpenCL Computations in Multi-CPU\/Multi-GPU Environments"],"prefix":"10.1007","author":[{"given":"F\u00e1bio","family":"Soldado","sequence":"first","affiliation":[]},{"given":"Fernando","family":"Alexandre","sequence":"additional","affiliation":[]},{"given":"Herv\u00e9","family":"Paulino","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"16_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"170","DOI":"10.1007\/978-3-642-45293-2_13","volume-title":"Advanced Parallel Processing Technologies","author":"U. Dastgeer","year":"2013","unstructured":"Dastgeer, U., Li, L., Kessler, C.: Adaptive implementation selection in the skePU skeleton programming library. In: Wu, C., Cohen, A. (eds.) APPT 2013. LNCS, vol.\u00a08299, pp. 170\u2013183. Springer, Heidelberg (2013)"},{"key":"16_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"258","DOI":"10.1007\/978-3-642-39958-9_24","volume-title":"Parallel Computing Technologies","author":"M. Steuwer","year":"2013","unstructured":"Steuwer, M., Gorlatch, S.: SkelCL: Enhancing openCL for high-level programming of multi-GPU systems. In: Malyshkin, V. (ed.) PaCT 2013. LNCS, vol.\u00a07979, pp. 258\u2013272. Springer, Heidelberg (2013)"},{"key":"16_CR3","unstructured":"AMD Corporation: Bolt C++ Template Library, http:\/\/developer.amd.com\/tools\/heterogeneous-computing\/"},{"key":"16_CR4","unstructured":"Hoberock, J., Bell, N.: Thrust: A parallel template library, http:\/\/thrust.github.io\/"},{"issue":"2","key":"16_CR5","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1504\/IJHPCN.2012.046370","volume":"7","author":"S. Ernsting","year":"2012","unstructured":"Ernsting, S., Kuchen, H.: Algorithmic skeletons for multi-core, multi-GPU systems and clusters. Int. J. High Perform. Comput. Netw.\u00a07(2), 129\u2013138 (2012)","journal-title":"Int. J. High Perform. Comput. Netw."},{"key":"16_CR6","doi-asserted-by":"crossref","unstructured":"Huynh, H.P., et al.: Scalable framework for mapping streaming applications onto multi-GPU systems. In: PPoPP 2012, pp. 1\u201310. ACM (2012)","DOI":"10.1145\/2370036.2145818"},{"key":"16_CR7","doi-asserted-by":"crossref","unstructured":"Dubach, C., others: Compiling a high-level language for GPUs (via language support for architectures and compilers). In: PLDI 2012, pp. 1\u201312. ACM (2012)","DOI":"10.1145\/2345156.2254066"},{"key":"16_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"874","DOI":"10.1007\/978-3-642-40047-6_86","volume-title":"Euro-Par 2013 Parallel Processing","author":"R. Marques","year":"2013","unstructured":"Marques, R., Paulino, H., Alexandre, F., Medeiros, P.D.: Algorithmic Skeleton Framework for the Orchestration of GPU Computations. In: Wolf, F., Mohr, B., an Mey, D. (eds.) Euro-Par 2013. LNCS, vol.\u00a08097, pp. 874\u2013885. Springer, Heidelberg (2013)"},{"key":"16_CR9","doi-asserted-by":"crossref","unstructured":"Alexandre, F., Marques, R., Paulino, H.: On the support of task-parallel algorithmic skeletons for multi-GPU computing. In: SAC 2014, pp. 880\u2013885. ACM (2014)","DOI":"10.1145\/2554850.2555018"},{"key":"16_CR10","doi-asserted-by":"crossref","unstructured":"Rossbach, C.J., Yu, Y., Currey, J., Martin, J.-P., Fetterly, D.: Dandelion: a compiler and runtime for heterogeneous systems. In: SOSP 2013, pp. 49\u201368. ACM (2013)","DOI":"10.1145\/2517349.2522715"},{"key":"16_CR11","doi-asserted-by":"crossref","unstructured":"Dollinger, J.F., Loechner, V.: Adaptive runtime selection for GPU. In: ICPP 2013, pp. 70\u201379. IEEE Computer Society Press (2013)","DOI":"10.1109\/ICPP.2013.16"},{"key":"16_CR12","doi-asserted-by":"crossref","unstructured":"Chen, L., Huo, X., Agrawal, G.: Accelerating MapReduce on a coupled CPU-GPU architecture. In: SC 2012, pp. 25:1\u201325:11. IEEE Computer Society Press (2012)","DOI":"10.1109\/SC.2012.16"},{"key":"16_CR13","doi-asserted-by":"crossref","unstructured":"Lee, J., et al.: Transparent CPU-GPU collaboration for data-parallel kernels on heterogeneous systems. In: PaCT 2013, pp. 245\u2013255. IEEE (2013)","DOI":"10.1109\/PACT.2013.6618814"},{"key":"16_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"693","DOI":"10.1007\/978-3-642-55224-3_65","volume-title":"Parallel Processing and Applied Mathematics","author":"J. Cola\u00e7o","year":"2014","unstructured":"Cola\u00e7o, J., Matoga, A., Ilic, A., Roma, N., Tom\u00e1s, P., Chaves, R.: Transparent application acceleration by intelligent scheduling of shared library calls on heterogeneous systems. In: Wyrzykowski, R., Dongarra, J., Karczewski, K., Wa\u015bniewski, J. (eds.) PPAM 2013, Part I. LNCS, vol.\u00a08384, pp. 693\u2013703. Springer, Heidelberg (2014)"},{"key":"16_CR15","doi-asserted-by":"crossref","unstructured":"Soldado, F., Alexandre, F., Paulino, H.: Transparent execution of compound OpenCL computations in multi-CPU\/multi-GPU environments. Technical report, CITI\/DI, Universidade NOVA de Lisboa (2014)","DOI":"10.1007\/978-3-319-14325-5_16"},{"key":"16_CR16","doi-asserted-by":"crossref","unstructured":"Dathathri, R., et al.: Generating efficient data movement code for heterogeneous architectures with distributed-memory. In: PaCT 2013, pp. 375\u2013386. IEEE (2013)","DOI":"10.1109\/PACT.2013.6618826"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2014: Parallel Processing Workshops"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-14325-5_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,14]],"date-time":"2025-05-14T00:19:05Z","timestamp":1747181945000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-14325-5_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319143248","9783319143255"],"references-count":16,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-14325-5_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}