{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T08:10:18Z","timestamp":1773130218259,"version":"3.50.1"},"reference-count":19,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2018,3,17]],"date-time":"2018-03-17T00:00:00Z","timestamp":1521244800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2019,3]]},"DOI":"10.1007\/s11227-018-2318-5","type":"journal-article","created":{"date-parts":[[2018,3,17]],"date-time":"2018-03-17T03:16:55Z","timestamp":1521256615000},"page":"1123-1136","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Load balancing in a heterogeneous world: CPU-Xeon Phi co-execution of data-parallel kernels"],"prefix":"10.1007","volume":"75","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4927-9829","authenticated-orcid":false,"given":"Ra\u00fal","family":"Nozal","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Borja","family":"Perez","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jose Luis","family":"Bosque","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ram\u00f3n","family":"Beivide","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,3,17]]},"reference":[{"key":"2318_CR1","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/j.parco.2016.05.006","volume":"58","author":"AM Aji","year":"2016","unstructured":"Aji AM et al (2016) MultiCL: enabling automatic scheduling for task-parallel workloads in OpenCL. Parallel Comput 58:37\u201355","journal-title":"Parallel Comput"},{"key":"2318_CR2","unstructured":"AMD Accelerated Parallel Processing (APP) Software Development Kit (SDK) V3. Last accessed January 2018. https:\/\/developer.amd.com\/amd-accelerated-parallel-processing-app-sdk\/"},{"issue":"4","key":"2318_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2400682.2400716","volume":"9","author":"ME Belviranli","year":"2013","unstructured":"Belviranli ME, Bhuyan LN, Gupta R (2013) A dynamic self-scheduling scheme for heterogeneous multiprocessor architectures. ACM Trans Archit Code Optim 9(4):1\u201320","journal-title":"ACM Trans Archit Code Optim"},{"issue":"2","key":"2318_CR4","doi-asserted-by":"publisher","first-page":"729","DOI":"10.1007\/s11227-014-1316-5","volume":"71","author":"E Castillo","year":"2014","unstructured":"Castillo E et al (2014) Financial applications on multi-CPU and multi-GPU architectures. J Supercomput 71(2):729\u2013739","journal-title":"J Supercomput"},{"key":"2318_CR5","doi-asserted-by":"crossref","unstructured":"Donyanavard B, M\u00fcck T, Sarma S, Dutt N (2016) SPARTA: runtime task allocation for energy efficient heterogeneous many-cores bryan. In: Proceedings of the 11th IEEE\/ACM\/IFIP International Conference on Hardware\/Software Codesign and System Synthesis, pp 1\u201310","DOI":"10.1145\/2968456.2968459"},{"issue":"3","key":"2318_CR6","doi-asserted-by":"publisher","first-page":"787","DOI":"10.1109\/TPDS.2016.2599527","volume":"28","author":"A Lastovetsky","year":"2017","unstructured":"Lastovetsky A, Szustak L, Wyrzykowski R (2017) Model-based optimization of eulag kernel on intel xeon phi through load imbalancing. IEEE Trans Parallel Distrib Syst 28(3):787\u2013797","journal-title":"IEEE Trans Parallel Distrib Syst"},{"issue":"3","key":"2318_CR7","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2798725","volume":"33","author":"J Lee","year":"2015","unstructured":"Lee J, Samadi M, Park Y, Mahlke S (2015) Skmd. ACM Trans Comput Syst 33(3):1\u201327","journal-title":"ACM Trans Comput Syst"},{"key":"2318_CR8","doi-asserted-by":"crossref","unstructured":"Li P, Brunet E, Trahay F, Parrot C, Thomas G, Namyst R (2015) Automatic OpenCL code generation for multi-device heterogeneous architectures. In: Proceedings of the International Conference on Parallel Processing, pp 959\u2013968","DOI":"10.1109\/ICPP.2015.105"},{"key":"2318_CR9","doi-asserted-by":"crossref","unstructured":"Lopez et al (2016) Towards achieving performance portability using directives for accelerators. In: Third workshop on accelerator programming using directives, pp 13\u201324","DOI":"10.1109\/WACCPD.2016.006"},{"key":"2318_CR10","doi-asserted-by":"crossref","unstructured":"Ma K, Li X, Chen W, Zhang C, Wang X (2012) GreenGPU: a holistic approach to energy efficiency in GPU-CPU heterogeneous architectures. In: Proceedings of the International Conference on Parallel Processing, pp 48\u201357","DOI":"10.1109\/ICPP.2012.31"},{"key":"2318_CR11","doi-asserted-by":"crossref","unstructured":"Pandit P, Govindarajan R (2014) Fluidic kernels: cooperative execution of opencl programs on multiple heterogeneous devices. In: Proceedings of Annual IEEE\/ACM International Symposium on Code Generation and Optimization, pp 273\u2013283","DOI":"10.1145\/2581122.2544163"},{"key":"2318_CR12","doi-asserted-by":"crossref","unstructured":"P\u00e9rez B, Bosque JL, Beivide R (2016) Simplifying programming and load balancing of data parallel applications on heterogeneous systems. In: Proceedings of the 9th Annual Workshop on General Purpose Processing using Graphics Processing Unit, ACM, pp 42\u201351","DOI":"10.1145\/2884045.2884051"},{"key":"2318_CR13","doi-asserted-by":"crossref","unstructured":"Salehian S, Liu J, Yan Y (2017) Comparison of threading programming models. In: Proceedings IEEE 31st International Parallel and Distributed Processing Sym. Workshops, pp 766\u2013774","DOI":"10.1109\/IPDPSW.2017.141"},{"issue":"3","key":"2318_CR14","first-page":"66","volume":"12","author":"JE Stone","year":"2010","unstructured":"Stone JE, Gohara D, Shi G (2010) OpenCL: a parallel programming standard for heterogeneous computing systems. IEEE Des Test 12(3):66\u201373","journal-title":"IEEE Des Test"},{"issue":"1","key":"2318_CR15","doi-asserted-by":"publisher","first-page":"140","DOI":"10.1016\/j.procs.2015.05.213","volume":"51","author":"A Vilches","year":"2015","unstructured":"Vilches A, Asenjo R, Navarro A, Corbera F, Gran R, Garzar\u00e1n M (2015) Adaptive partitioning for irregular applications on heterogeneous CPU\u2013GPU chips. Procedia Comput Sci 51(1):140\u2013149","journal-title":"Procedia Comput Sci"},{"key":"2318_CR16","doi-asserted-by":"crossref","unstructured":"Wienke S, Terboven C, An Mey D, Muller MS (2013) Accelerators, quo vadis? Performance vs. productivity. In: Proceedings of the International Conference on High Performance Computing and Simulation, pp 471\u2013473","DOI":"10.1109\/HPCSim.2013.6641455"},{"key":"2318_CR17","doi-asserted-by":"crossref","unstructured":"Xiao X, Hirasawa S, Takizawa H, Kobayashi H (2016) The importance of dynamic load balancing among openmp thread teams for irregular workloads. In: 4th International Symposium on Computing and Networking, pp 529\u2013535","DOI":"10.1109\/CANDAR.2016.0097"},{"issue":"3","key":"2318_CR18","doi-asserted-by":"publisher","first-page":"905","DOI":"10.1109\/TPDS.2016.2586074","volume":"28","author":"F Zhang","year":"2017","unstructured":"Zhang F, Zhai J, He B, Zhang S, Chen W (2017) Understanding co-running behaviors on integrated cpu\/gpu architectures. IEEE Trans Parallel Distrib Syst 28(3):905\u2013918","journal-title":"IEEE Trans Parallel Distrib Syst"},{"issue":"9","key":"2318_CR19","doi-asserted-by":"publisher","first-page":"2506","DOI":"10.1109\/TC.2014.2375202","volume":"64","author":"Z Zhong","year":"2015","unstructured":"Zhong Z, Rychkov V, Lastovetsky A (2015) Data partitioning on multicore and multi-GPU platforms using functional performance models. IEEE Trans Comput 64(9):2506\u20132518","journal-title":"IEEE Trans Comput"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-018-2318-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-018-2318-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-018-2318-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,12]],"date-time":"2019-10-12T21:45:55Z","timestamp":1570916755000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-018-2318-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,3,17]]},"references-count":19,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2019,3]]}},"alternative-id":["2318"],"URL":"https:\/\/doi.org\/10.1007\/s11227-018-2318-5","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,3,17]]},"assertion":[{"value":"17 March 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}