{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,13]],"date-time":"2024-09-13T05:54:08Z","timestamp":1726206848560},"reference-count":21,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2014,1,30]],"date-time":"2014-01-30T00:00:00Z","timestamp":1391040000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2015,12]]},"DOI":"10.1007\/s11227-014-1105-1","type":"journal-article","created":{"date-parts":[[2014,1,29]],"date-time":"2014-01-29T10:27:28Z","timestamp":1390991248000},"page":"4646-4662","source":"Crossref","is-referenced-by-count":3,"title":["Performance-aware composition framework for GPU-based systems"],"prefix":"10.1007","volume":"71","author":[{"given":"Usman","family":"Dastgeer","sequence":"first","affiliation":[]},{"given":"Christoph","family":"Kessler","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,1,30]]},"reference":[{"key":"1105_CR1","unstructured":"Augonnet C et al (2009) Automatic calibration of performance models on heterogeneous multicore architectures. In: Euro-Par Workshops (HPPC 2009), LNCS, vol 6043"},{"key":"1105_CR2","doi-asserted-by":"crossref","unstructured":"Hong S, Kim H (2009) An analytical model for a GPU architecture with memory-level and thread-level parallelism awareness. In: Proceedings of the Annual International Symposium on Computer Architecture (ISCA)","DOI":"10.1145\/1555754.1555775"},{"key":"1105_CR3","doi-asserted-by":"crossref","unstructured":"Karcher T, Pankratius V (2011) Run-time automatic performance tuning for multicore applications. In: Euro-Par, LNCS vol 6852","DOI":"10.1007\/978-3-642-23400-2_2"},{"key":"1105_CR4","doi-asserted-by":"crossref","unstructured":"Ansel J et al (2009) PetaBricks: a language and compiler for algorithmic choice. In: Proceedings conference on Programming Language Design and Implementation (PLDI)","DOI":"10.1145\/1542476.1542481"},{"key":"1105_CR5","doi-asserted-by":"crossref","unstructured":"Linderman MD et al (2008) Merge: a programming model for heterogeneous multi-core systems. In: Proceedings of the International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS)","DOI":"10.1145\/1346281.1346318"},{"key":"1105_CR6","doi-asserted-by":"crossref","unstructured":"Wernsing JR, Stitt G (2010) Elastic computing: a framework for transparent, portable, and adaptive multi-core heterogeneous computing. In: Proceedings of the Conference on Languages, Compilers, and Tools for Embedded Systems (LCTES)","DOI":"10.1145\/1755888.1755906"},{"key":"1105_CR7","doi-asserted-by":"crossref","unstructured":"Gregg C, Hazelwood K (2011) Where is the data? Why you cannot debate CPU vs. GPU performance without the answer. In: International Symposium on Performance Analysis of Systems and Software (ISPASS)","DOI":"10.1109\/ISPASS.2011.5762730"},{"key":"1105_CR8","unstructured":"Quinlan D, Liao C (2011) The ROSE source-to-source compiler infrastructure. In: Cetus users and compiler infrastructure workshop, USA"},{"key":"1105_CR9","doi-asserted-by":"crossref","first-page":"187","DOI":"10.1002\/cpe.1631","volume":"23","author":"C Augonnet","year":"2011","unstructured":"Augonnet C et al (2011) StarPU: a unified platform for task scheduling on heterogeneous multicore architectures. Concurr Comput Pract Exper 23:187\u2013198","journal-title":"Concurr Comput Pract Exper"},{"key":"1105_CR10","doi-asserted-by":"crossref","unstructured":"Feautrier P (1991) Dataflow analysis of array and scalar references. Intl J Parallel Program 20(1)","DOI":"10.1007\/BF01407931"},{"key":"1105_CR11","doi-asserted-by":"crossref","unstructured":"Kicherer M et al (2011) Cost-aware function migration in heterogeneous systems. In: Proceedings of the Conference on High Performance and Embedded Architectures and Compilers (HiPEAC)","DOI":"10.1145\/1944862.1944883"},{"key":"1105_CR12","doi-asserted-by":"crossref","unstructured":"Li L, Dastgeer U, Kessler C (2013) Adaptive off-line tuning for optimized composition of components for heterogeneous many-core systems. In: Seventh Int. Workshop on Automatic Performance Tuning (iWAPT-2012), Proc. VECPAR-2012 Conference","DOI":"10.1007\/978-3-642-38718-0_32"},{"key":"1105_CR13","doi-asserted-by":"crossref","unstructured":"Che S et al (2009) Rodinia: a benchmark suite for heterogeneous computing. In: IEEE International Symposium on Workload Characterization (IISWC)","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"1105_CR14","doi-asserted-by":"crossref","unstructured":"Topcuoglu H et al (2002) Performance-effective and low-complexity task scheduling for heterogeneous computing. IEEE Trans Par Dist Syst 13(3)","DOI":"10.1109\/71.993206"},{"key":"1105_CR15","doi-asserted-by":"crossref","unstructured":"Korch M, Rauber, T (2006) Optimizing locality and scalability of embedded Runge-Kutta solvers using block-based pipelining. J Parallel Distrib Comput 66(3)","DOI":"10.1016\/j.jpdc.2005.09.003"},{"key":"1105_CR16","doi-asserted-by":"crossref","unstructured":"Kicherer M, Nowak F, Buchty R, Karl W (2012) Seamlessly portable applications:Managing the diversity of modern heterogeneous systems. ACM Trans Archit Code Optim 8(4):42(1\u201342:20)","DOI":"10.1145\/2086696.2086721"},{"key":"1105_CR17","doi-asserted-by":"crossref","unstructured":"Dastgeer U, Li L, Kessler C (2012) The PEPPHER composition tool: Performance-aware dynamic composition of applications for GPU-based systems. In: MuCoCoS, SC12","DOI":"10.1109\/SC.Companion.2012.97"},{"key":"1105_CR18","doi-asserted-by":"crossref","unstructured":"Lee S, Vetter JS (2012) Early evaluation of directive-based gpu programming models for productive exascale computing. In: Conference for high performance computing, networking, storage and analysis","DOI":"10.1109\/SC.2012.51"},{"key":"1105_CR19","doi-asserted-by":"crossref","first-page":"349","DOI":"10.1007\/s11227-011-0591-7","volume":"58","author":"R Reyes","year":"2011","unstructured":"Reyes R, Sande F (2011) Automatic code generation for GPUs in llc. J Supercomput 58:349\u2013356","journal-title":"J Supercomput"},{"issue":"5","key":"1105_CR20","doi-asserted-by":"crossref","first-page":"481","DOI":"10.1002\/cpe.1844","volume":"24","author":"CW Kessler","year":"2012","unstructured":"Kessler CW, L\u00f6we W (2012) Optimized composition of performance-aware parallel components. Concurr Comput Pract Exper 24(5):481\u2013498","journal-title":"Concurr Comput Pract Exper"},{"key":"1105_CR21","unstructured":"Ericsson M (2008) Composition and optimization. V\u00e4xj\u00f6 University Press, Kalmar"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-014-1105-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-014-1105-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-014-1105-1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,7]],"date-time":"2019-08-07T03:40:50Z","timestamp":1565149250000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-014-1105-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,1,30]]},"references-count":21,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2015,12]]}},"alternative-id":["1105"],"URL":"https:\/\/doi.org\/10.1007\/s11227-014-1105-1","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014,1,30]]}}}