{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T13:59:23Z","timestamp":1774533563801,"version":"3.50.1"},"reference-count":105,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2018,4,26]],"date-time":"2018-04-26T00:00:00Z","timestamp":1524700800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/100003077","name":"The Knowledge Foundation","doi-asserted-by":"crossref","award":["20150088"],"award-info":[{"award-number":["20150088"]}],"id":[{"id":"10.13039\/100003077","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Computing"],"published-print":{"date-parts":[[2019,8]]},"DOI":"10.1007\/s00607-018-0614-9","type":"journal-article","created":{"date-parts":[[2018,4,26]],"date-time":"2018-04-26T15:13:01Z","timestamp":1524755581000},"page":"893-936","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":44,"title":["Using meta-heuristics and machine learning for software optimization of parallel computing systems: a systematic literature review"],"prefix":"10.1007","volume":"101","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1608-3181","authenticated-orcid":false,"given":"Suejb","family":"Memeti","sequence":"first","affiliation":[]},{"given":"Sabri","family":"Pllana","sequence":"additional","affiliation":[]},{"given":"Al\u00e9cio","family":"Binotto","sequence":"additional","affiliation":[]},{"given":"Joanna","family":"Ko\u0142odziej","sequence":"additional","affiliation":[]},{"given":"Ivona","family":"Brandic","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,4,26]]},"reference":[{"key":"614_CR1","doi-asserted-by":"crossref","unstructured":"Agakov F, Bonilla E, Cavazos J, Franke B, Fursin G, O\u2019Boyle MF, Thomson J, Toussaint M, Williams CK (2006) Using machine learning to focus iterative optimization. In: Proceedings of the international symposium on code generation and optimization, IEEE Computer Society, pp 295\u2013305","DOI":"10.1109\/CGO.2006.37"},{"key":"614_CR2","first-page":"231","volume-title":"Scheduling parallel programs using genetic algorithms. Solutions to parallel and distributed computing problems","author":"I Ahmad","year":"2001","unstructured":"Ahmad I, Kwok Y, Ahmad I, Dhodhi M (2001) Scheduling parallel programs using genetic algorithms. Solutions to parallel and distributed computing problems. Wiley, New York, pp 231\u2013254"},{"key":"614_CR3","volume-title":"Compilers: principles, techniques, and tools","author":"AV Aho","year":"2006","unstructured":"Aho AV, Lam MS, Sethi R, Ullman JD (2006) Compilers: principles, techniques, and tools, 2nd edn. Addison-Wesley Longman Publishing Co., Inc, Boston","edition":"2"},{"issue":"12","key":"614_CR4","doi-asserted-by":"publisher","first-page":"867","DOI":"10.1016\/j.parco.2013.08.011","volume":"39","author":"OE Albayrak","year":"2013","unstructured":"Albayrak OE, Akturk I, Ozturk O (2013) Improving application behavior on heterogeneous manycore systems through kernel mapping. Parallel Comput 39(12):867\u2013878","journal-title":"Parallel Comput"},{"key":"614_CR5","volume-title":"PetaBricks: a language and compiler for algorithmic choice","author":"J Ansel","year":"2009","unstructured":"Ansel J, Chan C, Wong YL, Olszewski M, Zhao Q, Edelman A, Amarasinghe S (2009) PetaBricks: a language and compiler for algorithmic choice. ACM, New York"},{"key":"614_CR6","first-page":"10","volume-title":"Introduction to parallel computing","author":"B Barney","year":"2010","unstructured":"Barney B et al (2010) Introduction to parallel computing. Lawrence Livermore National Laboratory, Livermore, p 10"},{"key":"614_CR7","unstructured":"Beach TH, Avis NJ (2009) An intelligent semi-automatic application porting system for application accelerators. In: Proceedings of the combined workshops on UnConventional high performance computing workshop plus memory access workshop, ACM, pp 7\u201310"},{"issue":"1","key":"614_CR8","doi-asserted-by":"publisher","first-page":"180","DOI":"10.1109\/TVCG.2015.2467757","volume":"22","author":"F Beck","year":"2016","unstructured":"Beck F, Koch S, Weiskopf D (2016) Visual analysis and dissemination of scientific literature collections with survis. IEEE Trans Visual Comput Graphics 22(1):180\u2013189. https:\/\/doi.org\/10.1109\/TVCG.2015.2467757","journal-title":"IEEE Trans Visual Comput Graphics"},{"key":"614_CR9","unstructured":"Benkner S, Pllana S, Tr\u00e4ff JL, Tsigas P, Richards A, Namyst R, Bachmayer B, Kessler C, Moloney D, Sanders P (2011) The PEPPHER approach to programmability and performance portability for heterogeneous many-core architectures. In: ParCo"},{"issue":"2","key":"614_CR10","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1177\/004912418101000205","volume":"10","author":"P Biernacki","year":"1981","unstructured":"Biernacki P, Waldorf D (1981) Snowball sampling: problems and techniques of chain referral sampling. Sociol Methods Res 10(2):141\u2013163","journal-title":"Sociol Methods Res"},{"issue":"2","key":"614_CR11","doi-asserted-by":"publisher","first-page":"204","DOI":"10.1016\/j.conengprac.2012.10.001","volume":"21","author":"APD Binotto","year":"2013","unstructured":"Binotto APD, Wehrmeister MA, Kuijper A, Pereira CE (2013) Sm@rtConfig: a context-aware runtime and tuning system using an aspect-oriented approach for data intensive engineering applications. Control Eng Pract 21(2):204\u2013217","journal-title":"Control Eng Pract"},{"issue":"6","key":"614_CR12","doi-asserted-by":"publisher","first-page":"810","DOI":"10.1006\/jpdc.2000.1714","volume":"61","author":"TD Braun","year":"2001","unstructured":"Braun TD, Siegel HJ, Beck N, B\u00f6l\u00f6ni LL, Maheswaran M, Reuther AI, Robertson JP, Theys MD, Yao B, Hensgen D et al (2001) A comparison of eleven static heuristics for mapping a class of independent tasks onto heterogeneous distributed computing systems. J Parallel Distrib Comput 61(6):810\u2013837","journal-title":"J Parallel Distrib Comput"},{"issue":"6","key":"614_CR13","doi-asserted-by":"publisher","first-page":"599","DOI":"10.1016\/j.future.2008.12.001","volume":"25","author":"R Buyya","year":"2009","unstructured":"Buyya R, Yeo CS, Venugopal S, Broberg J, Brandic I (2009) Cloud computing and emerging it platforms: vision, hype, and reality for delivering computing as the 5th utility. Future Gen Comp Syst 25(6):599\u2013616. https:\/\/doi.org\/10.1016\/j.future.2008.12.001","journal-title":"Future Gen Comp Syst"},{"issue":"6","key":"614_CR14","first-page":"1","volume":"3","author":"J Carretero","year":"2007","unstructured":"Carretero J, Xhafa F, Abraham A (2007) Genetic algorithm based schedulers for grid computing systems. Int J Innov Comput Inf Control 3(6):1\u201319","journal-title":"Int J Innov Comput Inf Control"},{"key":"614_CR15","doi-asserted-by":"crossref","unstructured":"Castro M, Goes LFW, Ribeiro CP, Cole M, Cintra M, Mehaut JF (2011) A machine learning-based approach for thread mapping on transactional memory applications. In: 2011 18th International conference on high performance computing (HiPC), IEEE, pp 1\u201310","DOI":"10.1109\/HiPC.2011.6152736"},{"key":"614_CR16","doi-asserted-by":"crossref","unstructured":"Castro M, G\u00f3es LFW, Fernandes LG, M\u00e9haut JF (2012) Dynamic thread mapping based on machine learning for transactional memory applications. In: Euro-Par 2012 Parallel Processing, Springer, pp 465\u2013476","DOI":"10.1007\/978-3-642-32820-6_47"},{"key":"614_CR17","doi-asserted-by":"crossref","unstructured":"Cavazos J, Moss JEB (2004) Inducing heuristics to decide whether to schedule. In: Conference on programming language design and implementation, ACM, New York, NY, USA, PLDI \u201904, pp 183\u2013194","DOI":"10.1145\/996841.996864"},{"key":"614_CR18","doi-asserted-by":"crossref","unstructured":"Cavazos J, Fursin G, Agakov F, Bonilla E, Boyle MF, Temam O (2007) Rapidly selecting good compiler optimizations using performance counters. In: International symposium on code generation and optimization, 2007. CGO\u201907. IEEE, pp 185\u2013197","DOI":"10.1109\/CGO.2007.32"},{"key":"614_CR19","doi-asserted-by":"crossref","unstructured":"Chen X, Long S (2009) Adaptive multi-versioning for OpenMP parallelization via machine learning. In: 15th International conference on parallel and distributed systems (ICPADS), 2009, IEEE, pp 907\u2013912","DOI":"10.1109\/ICPADS.2009.77"},{"key":"614_CR20","doi-asserted-by":"publisher","unstructured":"Chirkin AM, Belloum AS, Kovalchuk SV, Makkes MX, Melnik MA, Visheratin AA, Nasonov DA (2017) Execution time estimation for workflow scheduling. Future generation computer systems. https:\/\/doi.org\/10.1016\/j.future.2017.01.011","DOI":"10.1016\/j.future.2017.01.011"},{"key":"614_CR21","doi-asserted-by":"crossref","unstructured":"Cooper KD, Grosul A, Harvey TJ, Reeves S, Subramanian D, Torczon L, Waterman T (2005) ACME: adaptive compilation made efficient. In: ACM SIGPLAN notices, ACM 40:69\u201377","DOI":"10.1145\/1065910.1065921"},{"issue":"7","key":"614_CR22","doi-asserted-by":"publisher","first-page":"599","DOI":"10.1109\/TPDS.2005.85","volume":"16","author":"J Corbalan","year":"2005","unstructured":"Corbalan J, Martorell X, Labarta J (2005) Performance-driven processor allocation. IEEE Trans Parallel Distrib Syst 16(7):599\u2013611","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"614_CR23","doi-asserted-by":"crossref","unstructured":"Danylenko A, Kessler C, L\u00f6we W (2011) Comparing machine learning approaches for context-aware composition. In: Software composition, Springer, Berlin pp 18\u201333","DOI":"10.1007\/978-3-642-22045-6_2"},{"key":"614_CR24","doi-asserted-by":"publisher","unstructured":"Diamos GF, Yalamanchili S (2008) Harmony: An execution model and runtime for heterogeneous many core systems. In: Proceedings of the 17th international symposium on high performance distributed Computing, ACM, New York, NY, USA, HPDC \u201908, pp 197\u2013200. https:\/\/doi.org\/10.1145\/1383422.1383447","DOI":"10.1145\/1383422.1383447"},{"issue":"13","key":"614_CR25","first-page":"1","volume":"13","author":"K Diefendorff","year":"1999","unstructured":"Diefendorff K (1999) Power4 focuses on memory bandwidth. Microprocess. Rep. 13(13):1\u20138","journal-title":"Microprocess. Rep."},{"issue":"2","key":"614_CR26","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1109\/MCSE.2005.34","volume":"7","author":"J Dongarra","year":"2005","unstructured":"Dongarra J, Sterling T, Simon H, Strohmaier E (2005) High-performance computing: clusters, constellations, mpps, and future directions. Comput. Sci. Eng. 7(2):51\u201359","journal-title":"Comput. Sci. Eng."},{"key":"614_CR27","volume-title":"Pattern classification and scene analysis","author":"RO Duda","year":"1973","unstructured":"Duda RO, Hart PE et al (1973) Pattern classification and scene analysis, vol 3. Wiley, New York"},{"key":"614_CR28","doi-asserted-by":"crossref","unstructured":"Eastep J, Wingate D, Santambrogio MD, Agarwal A (2010) Smartlocks: lock acquisition scheduling for self-aware synchronization. In: Proceedings of the 7th international conference on autonomic computing, ACM, pp 215\u2013224","DOI":"10.1145\/1809049.1809079"},{"key":"614_CR29","doi-asserted-by":"crossref","unstructured":"Eastep J, Wingate D, Agarwal A (2011) Smart data structures: an online machine learning approach to multicore data structures. In: Proceedings of the 8th international conference on Autonomic computing, ACM, pp 11\u201320","DOI":"10.1145\/1998582.1998587"},{"key":"614_CR30","doi-asserted-by":"crossref","unstructured":"Emani MK, Wang Z, O\u2019Boyle MF (2013) Smart, adaptive mapping of parallelism in the presence of external workload. In: International symposium on code generation and optimization (CGO), IEEE, pp 1\u201310","DOI":"10.1109\/CGO.2013.6495010"},{"key":"614_CR31","doi-asserted-by":"crossref","unstructured":"Fonseca A, Cabral B (2013) \u00c6miniumGPU: An Intelligent Framework for GPU Programming. In: Facing the multicore-challenge III, Springer, pp 96\u2013107","DOI":"10.1007\/978-3-642-35893-7_9"},{"key":"614_CR32","volume-title":"The Grid 2: blueprint for a new computing infrastructure","author":"I Foster","year":"2003","unstructured":"Foster I, Kesselman C (2003) The Grid 2: blueprint for a new computing infrastructure. Elsevier, Amsterdam"},{"key":"614_CR33","doi-asserted-by":"publisher","unstructured":"Foster I, Zhao Y, Raicu I, Lu S (2008) Cloud computing and grid computing 360-degree compared. In: 2008 Grid computing environments workshop, pp 1\u201310. https:\/\/doi.org\/10.1109\/GCE.2008.4738445","DOI":"10.1109\/GCE.2008.4738445"},{"key":"614_CR34","unstructured":"Fursin G, Miranda C, Temam O, Namolaru M, Yom-Tov E, Zaks A, Mendelson B, Bonilla E, Thomson J, Leather H, et\u00a0al. (2008) MILEPOST GCC: machine learning based research compiler. In: GCC summit"},{"issue":"3","key":"614_CR35","doi-asserted-by":"publisher","first-page":"296","DOI":"10.1007\/s10766-010-0161-2","volume":"39","author":"G Fursin","year":"2011","unstructured":"Fursin G, Kashnikov Y, Memon AW, Chamski Z, Temam O, Namolaru M, Yom-Tov E, Mendelson B, Zaks A, Courtois E et al (2011) Milepost gcc: machine learning enabled self-tuning compiler. Int J Parallel Prog 39(3):296\u2013327","journal-title":"Int J Parallel Prog"},{"key":"614_CR36","doi-asserted-by":"crossref","unstructured":"Gaussier E, Glesser D, Reis V, Trystram D (2015) Improving backfilling by using machine learning to predict running times. In: Proceedings of the international conference for high performance computing, networking, storage and analysis, ACM, p\u00a064","DOI":"10.1145\/2807591.2807646"},{"issue":"5","key":"614_CR37","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1109\/MC.2005.160","volume":"38","author":"D Geer","year":"2005","unstructured":"Geer D (2005) Chip makers turn to multicore processors. Computer 38(5):11\u201313. https:\/\/doi.org\/10.1109\/MC.2005.160","journal-title":"Computer"},{"key":"614_CR38","doi-asserted-by":"crossref","unstructured":"Gordon MI, Thies W, Amarasinghe S (2006) Exploiting coarse-grained task, data, and pipeline parallelism in stream programs. In: ACM SIGOPS Operating Systems Review, ACM 40:151\u2013162","DOI":"10.1145\/1168857.1168877"},{"key":"614_CR39","unstructured":"Gould N (2006) An introduction to algorithms for continuous optimization. Oxford University Computing Laboratory Notes"},{"key":"614_CR40","doi-asserted-by":"crossref","unstructured":"Grewe D, OBoyle MF (2011) A static task partitioning approach for heterogeneous systems using opencl. In: Compiler Construction, Springer, Berlin, pp 286\u2013305","DOI":"10.1007\/978-3-642-19861-8_16"},{"key":"614_CR41","doi-asserted-by":"crossref","unstructured":"Grewe D, Wang Z, O\u2019Boyle MF (2011) A workload-aware mapping approach for data-parallel programs. In: Proceedings of the 6th international conference on high performance and embedded architectures and compilers, ACM, pp 117\u2013126","DOI":"10.1145\/1944862.1944881"},{"key":"614_CR42","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7055.001.0001","volume-title":"Using MPI: portable parallel programming with the message-passing interface","author":"W Gropp","year":"1999","unstructured":"Gropp W, Lusk E, Skjellum A (1999) Using MPI: portable parallel programming with the message-passing interface, vol 1. MIT press, Cambridge"},{"key":"614_CR43","doi-asserted-by":"crossref","unstructured":"Grzonka D, Kolodziej J, Tao J (2014) Using artificial neural network for monitoring and supporting the grid scheduler performance. In: ECMS, pp 515\u2013522","DOI":"10.7148\/2014-0515"},{"key":"614_CR44","unstructured":"Grzonka D, Jakbik A, Ko\u00c5odziej J, Pllana S (2017) Using a multi-agent system and artificial intelligence for monitoring and improving the cloud performance and security. Future Generation Computer Systems. 10.1016\/j.future.2017.05.046, http:\/\/www.sciencedirect.com\/science\/article\/pii\/S0167739X17310531"},{"key":"614_CR45","first-page":"1157","volume":"3","author":"I Guyon","year":"2003","unstructured":"Guyon I, Elisseeff A (2003) An introduction to variable and feature selection. J Mach Learn Res 3:1157\u20131182","journal-title":"J Mach Learn Res"},{"key":"614_CR46","volume-title":"ICAC","author":"H Hoffmann","year":"2010","unstructured":"Hoffmann H, Eastep J, Santambrogio MD, Miller JE, Agarwal A (2010a) Application heartbeats: a generic interface for specifying program performance and goals in autonomous computing environments. In: Parashar M, Figueiredo RJO, Kiciman E (eds) ICAC. ACM, New York City"},{"key":"614_CR47","unstructured":"Hoffmann H, Maggio M, Santambrogio MD, Leva A, Agarwal A (2010b) SEEC: A framework for self-aware computing. http:\/\/hdl.handle.net\/1721.1\/59519"},{"key":"614_CR48","unstructured":"Iakymchuk R, Jordan H, Bo\u00a0Peng I, Markidis S, Laure E (2016) A particle-in-cell method for automatic load-balancing with the allscale environment. In: The Exascale applications & Software conference (EASC2016)"},{"key":"614_CR49","volume-title":"High Performance Parallelism Pearls Volume Two: Multicore and Many-core Programming Approaches","author":"J Jeffers","year":"2015","unstructured":"Jeffers J, Reinders J (2015) High Performance Parallelism Pearls Volume Two: Multicore and Many-core Programming Approaches. Morgan Kaufmann, Burlington"},{"key":"614_CR50","doi-asserted-by":"publisher","unstructured":"Jin C, de\u00a0Supinski BR, Abramson D, Poxon H, DeRose L, Dinh MN, Endrei M, Jessup ER (2016) A survey on software methods to improve the energy efficiency of parallel computing. In: The international journal of high performance computing applications p 1094342016665471. https:\/\/doi.org\/10.1177\/1094342016665471","DOI":"10.1177\/1094342016665471"},{"issue":"5","key":"614_CR51","doi-asserted-by":"publisher","first-page":"481","DOI":"10.1002\/cpe.1844","volume":"24","author":"C Kessler","year":"2012","unstructured":"Kessler C, L\u00f6we W (2012) Optimized composition of performance-aware parallel components. Concurr Comput Pract Exp 24(5):481\u2013498","journal-title":"Concurr Comput Pract Exp"},{"key":"614_CR52","doi-asserted-by":"crossref","unstructured":"Kessler C, Dastgeer U, Thibault S, Namyst R, Richards A, Dolinsky U, Benkner S, Tr\u00e4ff JL, Pllana S (2012) Programmability and performance portability aspects of heterogeneous multi-\/manycore systems. In: Design, automation & test in Europe conference & exhibition (DATE), 2012, IEEE, pp 1403\u20131408","DOI":"10.1109\/DATE.2012.6176582"},{"key":"614_CR53","unstructured":"Kitchenham B, Charters S (2007) Guidelines for performing systematic literature reviews in software engineering. In: Technical Report EBSE 2007-001, Keele University and Durham University Joint Report"},{"key":"614_CR54","unstructured":"Lee BD, Schopf JM (2003) Run-time prediction of parallel applications on shared environments. In: IEEE International conference on cluster computing, 2003. Proceedings. 2003, IEEE, pp 487\u2013491"},{"key":"614_CR55","unstructured":"Li L, Dastgeer U, Kessler C (2012) Adaptive off-line tuning for optimized composition of components for heterogeneous many-core systems. In: High performance computing for computational science-VECPAR 2012, Springer, pp 329\u2013345"},{"key":"614_CR56","doi-asserted-by":"crossref","unstructured":"Li M, Zeng L, Meng S, Tan J, Zhang L, Butt AR, Fuller N (2014) Mronline: Mapreduce online performance tuning. In: Proceedings of the 23rd international symposium on High-performance parallel and distributed computing, ACM, pp 165\u2013176","DOI":"10.1145\/2600212.2600229"},{"key":"614_CR57","doi-asserted-by":"crossref","unstructured":"Liu B, Zhao Y, Zhong X, Liang Z, Feng B (2013) A Novel Thread Partitioning Approach Based on Machine Learning for Speculative Multithreading. In: IEEE international conference on embedded and ubiquitous computing high performance computing and communications & 2013 (HPCC_EUC), 2013 IEEE 10th International Conference on, IEEE, pp 826\u2013836","DOI":"10.1109\/HPCC.and.EUC.2013.119"},{"key":"614_CR58","doi-asserted-by":"publisher","unstructured":"Luk CK, Hong S, Kim H (2009) Qilin: Exploiting parallelism on heterogeneous multiprocessors with adaptive mapping. In: Proceedings of the 42nd annual IEEE\/ACM international symposium on microarchitecture, ACM, New York, NY, USA, MICRO 42, pp 45\u201355. https:\/\/doi.org\/10.1145\/1669112.1669121","DOI":"10.1145\/1669112.1669121"},{"key":"614_CR59","doi-asserted-by":"publisher","unstructured":"Malawski M, Juve G, Deelman E, Nabrzyski J (2015) Algorithms for cost- and deadline-constrained provisioning for scientific workflow ensembles in iaas clouds. Future Generation Computer Systems 48:1\u201318. https:\/\/doi.org\/10.1016\/j.future.2015.01.004 , special Section: Business and Industry Specific Cloud","DOI":"10.1016\/j.future.2015.01.004"},{"key":"614_CR60","unstructured":"Mantripragada K, Binotto APD, Tizzei LP (2014) A self-adaptive auto-scaling method for scientific applications on HPC environments and clouds. CoRR abs\/1412.6392"},{"key":"614_CR61","doi-asserted-by":"publisher","unstructured":"Mastelic T, Fdhila W, Brandic I, Rinderle-Ma S (2015) Predicting resource allocation and costs for business processes in the cloud. In: 2015 IEEE world congress on services, pp 47\u201354. https:\/\/doi.org\/10.1109\/SERVICES.2015.16","DOI":"10.1109\/SERVICES.2015.16"},{"key":"614_CR62","doi-asserted-by":"publisher","unstructured":"Memeti S, Pllana S (2016a) Combinatorial optimization of dna sequence analysis on heterogeneous systems. Concurrency and computation: practice and experience pp n\/a-n\/a. https:\/\/doi.org\/10.1002\/cpe.4037 , cpe.4037","DOI":"10.1002\/cpe.4037"},{"key":"614_CR63","doi-asserted-by":"publisher","unstructured":"Memeti S, Pllana S (2016b) Combinatorial optimization of work distribution on heterogeneous systems. In: 2016 45th international conference on parallel processing workshops (ICPPW), pp 151\u2013160. https:\/\/doi.org\/10.1109\/ICPPW.2016.35","DOI":"10.1109\/ICPPW.2016.35"},{"key":"614_CR64","doi-asserted-by":"publisher","unstructured":"Memeti S, Pllana S (2016c) A machine learning approach for accelerating dna sequence analysis. The International Journal of High Performance Computing Applications 0(0):1094342016654,214. https:\/\/doi.org\/10.1177\/1094342016654214","DOI":"10.1177\/1094342016654214"},{"key":"614_CR65","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/978-3-319-44881-7_14","volume-title":"Optimal worksharing of DNA sequence analysis on accelerated platforms","author":"S Memeti","year":"2016","unstructured":"Memeti S, Pllana S, Ko\u0142odziej J (2016) Optimal worksharing of DNA sequence analysis on accelerated platforms. Springer, Cham, pp 279\u2013309. https:\/\/doi.org\/10.1007\/978-3-319-44881-7_14"},{"key":"614_CR66","doi-asserted-by":"publisher","unstructured":"Memeti S, Li L, Pllana S, Kolodziej J, Kessler C (2017) Benchmarking opencl, openacc, openmp, and cuda: Programming productivity, performance, and energy consumption. In: Proceedings of the 2017 workshop on adaptive resource management and scheduling for cloud computing, ACM, New York, NY, USA, ARMS-CC \u201917, pp 1\u20136. https:\/\/doi.org\/10.1145\/3110355.3110356","DOI":"10.1145\/3110355.3110356"},{"key":"614_CR67","volume-title":"Machine learning","author":"TM Mitchell","year":"1997","unstructured":"Mitchell TM (1997) Machine learning, 1st edn. McGraw-Hill Inc, New York, NY, USA","edition":"1"},{"issue":"4","key":"614_CR68","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1145\/2788396","volume":"47","author":"S Mittal","year":"2015","unstructured":"Mittal S, Vetter JS (2015) A survey of cpu-gpu heterogeneous computing techniques. ACM Comput Surv (CSUR) 47(4):69","journal-title":"ACM Comput Surv (CSUR)"},{"key":"614_CR69","unstructured":"Monsifrot A, Bodin F, Quiniou R (2002) A machine learning approach to automatic production of compiler heuristics. In: Artificial intelligence: methodology, systems, and applications, Springer, pp 41\u201350"},{"key":"614_CR70","first-page":"18","volume":"120","author":"C Nvidia","year":"2015","unstructured":"Nvidia C (2015) CUDA C programming guide. NVIDIA Corp 120:18","journal-title":"NVIDIA Corp"},{"key":"614_CR71","doi-asserted-by":"crossref","unstructured":"Ogilvie W, Petoumenos P, Wang Z, Leather H (2015) Intelligent heuristic construction with active learning. In: Compilers for parallel computing (CPC\u201915). London, United Kingdom","DOI":"10.1007\/978-3-319-17473-0_10"},{"key":"614_CR72","unstructured":"OpenMP A (2013) OpenMP 4.0 specification, June 2013"},{"key":"614_CR73","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-09766-4","volume-title":"Encyclopedia of parallel computing","author":"D Padua","year":"2011","unstructured":"Padua D (2011) Encyclopedia of parallel computing. Springer, Berlin"},{"key":"614_CR74","doi-asserted-by":"crossref","unstructured":"Page AJ, Naughton TJ (2005a) Dynamic task scheduling using genetic algorithms for heterogeneous distributed computing. In: 19th International parallel and distributed processing symposium, IEEE, pp 189a\u2013189a","DOI":"10.1109\/IPDPS.2005.184"},{"issue":"3","key":"614_CR75","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1007\/s10462-005-9002-x","volume":"24","author":"AJ Page","year":"2005","unstructured":"Page AJ, Naughton TJ (2005b) Framework for task scheduling in heterogeneous distributed computing using genetic algorithms. Artif Intell Rev 24(3):415\u2013429. https:\/\/doi.org\/10.1007\/s10462-005-9002-x","journal-title":"Artif Intell Rev"},{"key":"614_CR76","unstructured":"Park Yw, Baskiyar S, Casey K (2010) A novel adaptive support vector machine based task scheduling. In: Proceedings the 9th International Conference on Parallel and Distributed Computing and Networks, Austria, pp 16\u201318"},{"key":"614_CR77","doi-asserted-by":"crossref","unstructured":"Pekhimenko G, Brown AD (2011) Efficient program compilation through machine learning techniques. In: Software Automatic Tuning, Springer, pp 335\u2013351","DOI":"10.1007\/978-1-4419-6935-4_19"},{"key":"614_CR78","doi-asserted-by":"crossref","unstructured":"Pllana S, Benkner S, Mehofer E, Natvig L, Xhafa F (2008) Towards an intelligent environment for programming multi-core computing systems. Euro-Par Workshops, Springer, Lecture Notes in Computer Science 5415:141\u2013151","DOI":"10.1007\/978-3-642-00955-6_19"},{"key":"614_CR79","volume-title":"Numerical recipes 3rd edition: the art of scientific computing","author":"WH Press","year":"2007","unstructured":"Press WH, Teukolsky SA, Vetterling WT, Flannery BP (2007) Numerical recipes 3rd edition: the art of scientific computing, 3rd edn. Cambridge University Press, Cambridge","edition":"3"},{"key":"614_CR80","unstructured":"Ravi VT, Agrawal G (2011) A dynamic scheduling framework for emerging heterogeneous systems. In: 18th International conference on high performance computing (HiPC), 2011, IEEE, pp 1\u201310"},{"key":"614_CR81","doi-asserted-by":"crossref","unstructured":"Rossbach CJ, Yu Y, Currey J, Martin JP, Fetterly D (2013) Dandelion: a compiler and runtime for heterogeneous systems. In: Proceedings of the twenty-fourth ACM symposium on operating systems principles, ACM, pp 49\u201368","DOI":"10.1145\/2517349.2522715"},{"key":"614_CR82","doi-asserted-by":"publisher","unstructured":"Sadashiv N, Kumar SMD (2011) Cluster, grid and cloud computing: A detailed comparison. In: 2011 6th International conference on computer science education (ICCSE), pp 477\u2013482. https:\/\/doi.org\/10.1109\/ICCSE.2011.6028683","DOI":"10.1109\/ICCSE.2011.6028683"},{"issue":"1\u20132","key":"614_CR83","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1016\/j.parco.2011.10.008","volume":"38","author":"M Sandrieser","year":"2012","unstructured":"Sandrieser M, Benkner S, Pllana S (2012) Using explicit platform descriptions to support programming of heterogeneous many-core systems. Parallel Comput 38(1\u20132):52\u201356","journal-title":"Parallel Comput"},{"key":"614_CR84","doi-asserted-by":"publisher","unstructured":"Silvano C, Agosta G, Cherubin S, Gadioli D, Palermo G, Bartolini A, Benini L, Martinovi\u010d J, Palkovi\u010d M, Slaninov\u00e1 K, Bispo Ja, Cardoso JaMP, Abreu R, Pinto P, Cavazzoni C, Sanna N, Beccari AR, Cmar R, Rohou E (2016) The antarex approach to autotuning and adaptivity for energy efficient hpc systems. In: Proceedings of the international conference on computing frontiers, ACM, New York, NY, USA, CF \u201916, pp 288\u2013293. https:\/\/doi.org\/10.1145\/2903150.2903470","DOI":"10.1145\/2903150.2903470"},{"issue":"4","key":"614_CR85","doi-asserted-by":"publisher","first-page":"276","DOI":"10.1504\/IJBIC.2009.024726","volume":"1","author":"SN Sivanandam","year":"2009","unstructured":"Sivanandam SN, Visalakshi P (2009) Dynamic task scheduling with load balancing using parallel orthogonal particle swarm optimisation. Int J Bio-Inspired Comput 1(4):276\u2013286. https:\/\/doi.org\/10.1504\/IJBIC.2009.024726","journal-title":"Int J Bio-Inspired Comput"},{"issue":"5","key":"614_CR86","doi-asserted-by":"publisher","first-page":"1164","DOI":"10.1016\/j.future.2013.01.005","volume":"29","author":"S Smanchat","year":"2013","unstructured":"Smanchat S, Indrawan M, Ling S, Enticott C, Abramson D (2013) Scheduling parameter sweep workflow in the grid based on resource competition. Future Gen Comput Syst 29(5):1164\u20131183. https:\/\/doi.org\/10.1016\/j.future.2013.01.005","journal-title":"Future Gen Comput Syst"},{"key":"614_CR87","doi-asserted-by":"crossref","unstructured":"Stephenson M, Amarasinghe S (2005) Predicting unroll factors using supervised classification. In: International Symposium on code generation and optimization, 2005. CGO 2005, IEEE, pp 123\u2013134","DOI":"10.1109\/CGO.2005.29"},{"issue":"5","key":"614_CR88","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1145\/780822.781141","volume":"38","author":"M Stephenson","year":"2003","unstructured":"Stephenson M, Amarasinghe S, Martin M, O\u2019Reilly UM (2003) Meta optimization: improving compiler heuristics with machine learning. SIGPLAN Not 38(5):77\u201390","journal-title":"SIGPLAN Not"},{"key":"614_CR89","unstructured":"Sterling T, Becker DJ, Savarese D, Dorband JE, Ranawake UA, Packer CV (1995) Beowulf: A parallel workstation for scientific computation. In: Proceedings of the 24th international conference on parallel processing, pp 11\u201314"},{"issue":"1\u20133","key":"614_CR90","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1109\/MCSE.2010.69","volume":"12","author":"JE Stone","year":"2010","unstructured":"Stone JE, Gohara D, Shi G (2010) OpenCL: a parallel programming standard for heterogeneous computing systems. Comput Sci Eng 12(1\u20133):66\u201373","journal-title":"Comput Sci Eng"},{"key":"614_CR91","doi-asserted-by":"crossref","unstructured":"Thomas N, Tanase G, Tkachyshyn O, Perdue J, Amato NM, Rauchwerger L (2005) A framework for adaptive algorithm selection in STAPL. In: Proceedings of the tenth ACM SIGPLAN symposium on principles and practice of parallel programming, ACM, pp 277\u2013288","DOI":"10.1145\/1065944.1065981"},{"key":"614_CR92","doi-asserted-by":"publisher","unstructured":"Tiwari A, Hollingsworth JK (2011) Online adaptive code generation and tuning. In: 2011 IEEE international parallel distributed processing symposium, pp 879\u2013892. https:\/\/doi.org\/10.1109\/IPDPS.2011.86","DOI":"10.1109\/IPDPS.2011.86"},{"key":"614_CR93","doi-asserted-by":"publisher","unstructured":"Tiwari A, Chen C, Chame J, Hall M, Hollingsworth JK (2009) A scalable auto-tuning framework for compiler optimization. In: Proceedings of the 2009 IEEE international symposium on parallel & distributed processing, IEEE Computer Society, Washington, DC, USA, IPDPS \u201909, pp 1\u201312. https:\/\/doi.org\/10.1109\/IPDPS.2009.5161054","DOI":"10.1109\/IPDPS.2009.5161054"},{"key":"614_CR94","unstructured":"TOP500 (2016) TOP500 Supercomputer Sites. http:\/\/www.top500.org\/ . Accessed Jan 2016"},{"key":"614_CR95","doi-asserted-by":"crossref","unstructured":"Tournavitis G, Wang Z, Franke B, O\u2019Boyle MF (2009) Towards a holistic approach to auto-parallelization: integrating profile-driven parallelism detection and machine-learning based mapping. In: ACM Sigplan notices 44:177\u2013187","DOI":"10.1145\/1542476.1542496"},{"key":"614_CR96","doi-asserted-by":"publisher","unstructured":"Viebke A, Pllana S (2015) The potential of the intel (r) xeon phi for supervised deep learning. In: 2015 IEEE 17th international conference on high performance computing and communications (HPCC), pp 758\u2013765. https:\/\/doi.org\/10.1109\/HPCC-CSS-ICESS.2015.45","DOI":"10.1109\/HPCC-CSS-ICESS.2015.45"},{"issue":"1","key":"614_CR97","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1109\/MS.2011.12","volume":"28","author":"M Voss","year":"2011","unstructured":"Voss M, Kim W (2011) Multicore desktop programming with intel threading building blocks. IEEE Softw 28(1):23\u201331. https:\/\/doi.org\/10.1109\/MS.2011.12","journal-title":"IEEE Softw"},{"key":"614_CR98","doi-asserted-by":"crossref","unstructured":"Wang Z, O\u2019Boyle MF (2009) Mapping parallelism to multi-cores: a machine learning based approach. In: ACM Sigplan notices, ACM 44:75\u201384","DOI":"10.1145\/1594835.1504189"},{"issue":"3","key":"614_CR99","first-page":"20","volume":"10","author":"Z Wang","year":"2013","unstructured":"Wang Z, O\u2019boyle MF (2013) Using machine learning to partition streaming programs. ACM Trans Archit Code Optim (TACO) 10(3):20","journal-title":"ACM Trans Archit Code Optim (TACO)"},{"key":"614_CR100","doi-asserted-by":"crossref","unstructured":"Wienke S, Springer P, Terboven C, an\u00a0Mey D (2012) Openacc: First experiences with real-world applications. In: Proceedings of the 18th international conference on parallel processing, Springer-Verlag, Berlin, Heidelberg, Euro-Par\u201912, pp 859\u2013870","DOI":"10.1007\/978-3-642-32820-6_85"},{"key":"614_CR101","volume-title":"Integer and combinatorial optimization","author":"LA Wolsey","year":"2014","unstructured":"Wolsey LA, Nemhauser GL (2014) Integer and combinatorial optimization. Wiley, Hoboken"},{"key":"614_CR102","unstructured":"Zhang Y, Burcea M, Cheng V, Ho R, Voss M (2004) An adaptive openmp loop scheduler for hyperthreaded smps. In: ISCA PDCS, pp 256\u2013263"},{"key":"614_CR103","unstructured":"Zhang Y, Voss M, Rogers E (2005) Runtime empirical selection of loop schedulers on hyperthreaded smps. In: Proceedings of 19th IEEE International parallel and distributed processing symposium, 2005, IEEE, pp 44b\u201344b"},{"issue":"9","key":"614_CR104","doi-asserted-by":"publisher","first-page":"899","DOI":"10.1109\/71.954620","volume":"12","author":"AY Zomaya","year":"2001","unstructured":"Zomaya AY, Teh YH (2001) Observations on using genetic algorithms for dynamic load-balancing. IEEE Trans Parallel Distrib Syst 12(9):899\u2013911","journal-title":"IEEE Trans Parallel Distrib Syst"},{"key":"614_CR105","unstructured":"Zomaya AY, Lee RC, Olariu S (2001) An introduction to genetic-based scheduling in parallel processor systems. Solutions to Parallel and Distributed Computing Problems pp 111\u2013133"}],"container-title":["Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-018-0614-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00607-018-0614-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-018-0614-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,3]],"date-time":"2025-07-03T23:24:58Z","timestamp":1751585098000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00607-018-0614-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,4,26]]},"references-count":105,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2019,8]]}},"alternative-id":["614"],"URL":"https:\/\/doi.org\/10.1007\/s00607-018-0614-9","relation":{},"ISSN":["0010-485X","1436-5057"],"issn-type":[{"value":"0010-485X","type":"print"},{"value":"1436-5057","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,4,26]]},"assertion":[{"value":"13 December 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 April 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 April 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}