{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,6,4]],"date-time":"2024-06-04T23:16:21Z","timestamp":1717542981307},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2015,6,10]],"date-time":"2015-06-10T00:00:00Z","timestamp":1433894400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2015,9]]},"DOI":"10.1007\/s11227-015-1449-1","type":"journal-article","created":{"date-parts":[[2015,6,10]],"date-time":"2015-06-10T11:26:30Z","timestamp":1433935590000},"page":"3567-3592","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Design space exploration of hardware task superscalar architecture"],"prefix":"10.1007","volume":"71","author":[{"given":"Fahimeh","family":"Yazdanpanah","sequence":"first","affiliation":[]},{"given":"Mohammad","family":"Alaei","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,6,10]]},"reference":[{"key":"1449_CR1","doi-asserted-by":"crossref","unstructured":"Al-Kadi G, Terechko AS (2009) A hardware task scheduler for embedded video processing. In: Proceedings of the international conference on high performance and embedded architectures and compilers (HiPEAC), pp 140\u2013152","DOI":"10.1007\/978-3-540-92990-1_12"},{"key":"1449_CR2","doi-asserted-by":"crossref","unstructured":"Badia RM (2011) Top down programming methodology and tools with StarSs, enabling scalable programming paradigms: extended abstract. In: Proceedings of the workshop on scalable algorithms for large-scale systems (ScalA), pp 19\u201320","DOI":"10.1145\/2133173.2133182"},{"issue":"1\u20132","key":"1449_CR3","first-page":"77","volume":"17","author":"P Bellens","year":"2009","unstructured":"Bellens P, Perez JM, Cabarcas F, Ramirez A, Badia RM, Labarta J (2009) CellSs: scheduling techniques to better exploit memory hierarchy. Sci Program 17(1\u20132):77\u201395","journal-title":"Sci Program"},{"key":"1449_CR4","doi-asserted-by":"crossref","unstructured":"Bellens P, Perez J, Badia R, Labarta J (2006) CellSs: a programming model for the cell BE architecture. In: Proceedings of the supercomputing (SC). ACM, New York","DOI":"10.1109\/SC.2006.17"},{"key":"1449_CR5","unstructured":"Bsc application repository, bar (2014). In: Barcelona Supercomputing Center (BSC). https:\/\/pm.bsc.es\/projects\/bar . Accessed 06 Feb 2014"},{"key":"1449_CR6","doi-asserted-by":"crossref","unstructured":"Bueno J, Martinell L, Duran A, Farreras M, Martorell X, Badia RM, Ayguade E, Labarta J (2011) Productive cluster programming with OmpSs. In: Proceedings of the International conference on parallel processing (Euro-Par), pp 555\u2013566","DOI":"10.1007\/978-3-642-23400-2_52"},{"key":"1449_CR7","doi-asserted-by":"crossref","unstructured":"Castrillon J, Zhang D, Kempf T, Vanthournout B, Leupers R, Ascheid G (2009) Task management in MPSoCs: an ASIP approach. In: Proceedings of the international conference on computer-aided design (ICCAD), pp 587\u2013594","DOI":"10.1145\/1687399.1687508"},{"issue":"2","key":"1449_CR8","doi-asserted-by":"crossref","first-page":"173","DOI":"10.1142\/S0129626411000151","volume":"21","author":"A Duran","year":"2011","unstructured":"Duran A, Ayguade E, Badia RM, Labarta J, Martinell L, Martorell X, Planas J (2011) Ompss: a proposal for programming heterogeneous multi-core architectures. Parallel Process Lett 21(2):173\u2013193","journal-title":"Parallel Process Lett"},{"key":"1449_CR9","doi-asserted-by":"crossref","unstructured":"Etsion Y, Cabarcas F, Rico A, Ramirez A, Badia RM, Ayguade E, Labarta J, Valero M (2010) Task superscalar: an out-of-order task pipeline. In: Proceedings of the international symposium on microarchitecture (MICRO), pp 89\u2013100","DOI":"10.1109\/MICRO.2010.13"},{"key":"1449_CR10","unstructured":"Etsion Y, Ramirez A, Badia RM, Ayguade E, Labarta J, Valero M (2010) Task superscalar: using processors as functional units. In: Proceedings of the hot topics in parallelism (HOTPAR)"},{"key":"1449_CR11","doi-asserted-by":"crossref","unstructured":"Hoogerbrugge J, Terechko A (2011) A multithreaded multicore system for embedded media processing. Trans High-Perform Embedded Archit Compil (THEA) 3(2):154\u2013173 (2011)","DOI":"10.1007\/978-3-642-19448-1_9"},{"key":"1449_CR12","unstructured":"Jenista JC, Eom YH, Demsky B (2010) OoOJava: an out-of-order approach to parallel programming. In: Proceedings of the USENIX conference on hot topic in parallelism (HotPar), pp 11\u201311"},{"key":"1449_CR13","doi-asserted-by":"crossref","unstructured":"Jenista JC, Eom YH, Demsky BC (2011) OoOJava: software out-of-order execution. In: Proceedings of the ACM symposium on principles and practice of parallel programming (PPoPP), pp 57\u201368","DOI":"10.1145\/1941553.1941563"},{"issue":"4","key":"1449_CR14","doi-asserted-by":"crossref","first-page":"671","DOI":"10.1109\/TVLSI.2009.2014068","volume":"18","author":"R Kalra","year":"2010","unstructured":"Kalra R, Lysecky R (2010) Configuration locking and schedulability estimation for reduced reconfiguration overheads of reconfigurable systems. IEEE Trans Very Large Scale Integr Sys 18(4):671\u2013674","journal-title":"IEEE Trans Very Large Scale Integr Sys"},{"key":"1449_CR15","doi-asserted-by":"crossref","first-page":"144","DOI":"10.1016\/S0375-9601(02)01365-8","volume":"305","author":"LB Kish","year":"2002","unstructured":"Kish LB (2002) End of Moore\u2019s law: thermal (noise) death of integration in micro and nano electronics. Phys Lett A 305:144\u2013149","journal-title":"Phys Lett A"},{"issue":"2","key":"1449_CR16","doi-asserted-by":"crossref","first-page":"190","DOI":"10.1049\/ip-cds:20040434","volume":"151","author":"LB Kish","year":"2004","unstructured":"Kish LB (2004) Moore\u2019s law and the energy requirement of computing versus performance. IEE Proc Circuits Dev Syst 151(2):190\u2013194","journal-title":"IEE Proc Circuits Dev Syst"},{"key":"1449_CR17","doi-asserted-by":"crossref","unstructured":"Kumar S, Hughes CJ, Nguyen A (2007) Carbon: Architectural support for fine-grained parallelism on chip multiprocessors. In: Proceedings of the international symposium on computer architecture (ISCA), pp 162\u2013173","DOI":"10.1145\/1250662.1250683"},{"key":"1449_CR18","doi-asserted-by":"crossref","unstructured":"Lam MS, Rinard MC (1991) Coarse-grain parallel programming in Jade. In: Proceedings of the ACM symposium on principles and practice of parallel programming (PPoPP). ACM, New York, pp 94\u2013105","DOI":"10.1145\/109625.109636"},{"issue":"2","key":"1449_CR19","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1109\/MM.2008.31","volume":"28","author":"E Lindholm","year":"2008","unstructured":"Lindholm E, Nickolls J, Oberman S, Montrym J (2008) NVIDIA Tesla: a unified graphics and computing architecture. IEEE Micro 28(2):39\u201355","journal-title":"IEEE Micro"},{"key":"1449_CR20","doi-asserted-by":"crossref","unstructured":"Meenderinck C, Juurlink B (2010) A case for hardware task management support for the StarSs programming model. In: Proceedings of the conference on digital system design (DSD), pp 347\u2013354","DOI":"10.1109\/DSD.2010.63"},{"key":"1449_CR21","doi-asserted-by":"crossref","unstructured":"Meenderinck C, Juurlink B (2011) Nexus: hardware support for task-based programming. In: Proceedings of the conference on digital system design (DSD), pp 442\u2013445","DOI":"10.1109\/DSD.2011.62"},{"key":"1449_CR22","doi-asserted-by":"crossref","unstructured":"Nacul AC, Regazzoni F, Lajolo M (2007) Hardware scheduling support in SMP architectures. In: Proceedings of the conference on design, automation and test in Europe (DATE), pp 642\u2013647","DOI":"10.1109\/DATE.2007.364666"},{"key":"1449_CR23","doi-asserted-by":"crossref","unstructured":"Noguera J, Badia RM (2003) System-level power-performance trade-offs in task scheduling for dynamically reconfigurable architectures. In: Proceedings of the international conference on compilers, architectures and synthesis for embedded systems (CASES), pp 73\u201383","DOI":"10.1145\/951710.951722"},{"issue":"2","key":"1449_CR24","doi-asserted-by":"crossref","first-page":"385","DOI":"10.1145\/993396.993404","volume":"3","author":"J Noguera","year":"2004","unstructured":"Noguera J, Badia RM (2004) Multitasking on reconfigurable architectures: microarchitecture support and dynamic scheduling. ACM Trans Embedded Comput Syst 3(2):385\u2013406","journal-title":"ACM Trans Embedded Comput Syst"},{"key":"1449_CR25","unstructured":"Openmp application program interface, version 4.0 (2013). www.openmp.org\/ . Accessed 06 Feb 2014"},{"issue":"9","key":"1449_CR26","doi-asserted-by":"crossref","first-page":"296","DOI":"10.1587\/elex.5.296","volume":"5","author":"S Park","year":"2008","unstructured":"Park S (2008) A hardware operating system kernel for multi processors. IEICE Electron Express 5(9):296\u2013302","journal-title":"IEICE Electron Express"},{"issue":"6","key":"1449_CR27","doi-asserted-by":"crossref","first-page":"677","DOI":"10.1145\/78973.78978","volume":"33","author":"PK Pearson","year":"1990","unstructured":"Pearson PK (1990) Fast hashing of variable-length text strings. Commun ACM 33(6):677\u2013680","journal-title":"Commun ACM"},{"key":"1449_CR28","doi-asserted-by":"crossref","unstructured":"Perez, Badia RM, Labarta J (2008) A dependency-aware task-based programming environment for multi-core architectures. In: Proceedings of the international conference on cluster computing (CC), pp 142\u2013151","DOI":"10.1109\/CLUSTR.2008.4663765"},{"issue":"3","key":"1449_CR29","doi-asserted-by":"crossref","first-page":"483","DOI":"10.1145\/291889.291893","volume":"20","author":"MC Rinard","year":"1998","unstructured":"Rinard MC, Lam MS (1998) The design, implementation, and evaluation of Jade. ACM Trans Program Lang Syst (TPLS) 20(3):483\u2013545","journal-title":"ACM Trans Program Lang Syst (TPLS)"},{"key":"1449_CR30","doi-asserted-by":"crossref","unstructured":"Rinard MC, Scales DJ, Lam MS (1992) Heterogeneous parallel programming in Jade. In: Proceedings of the conference on supercomputing, pp 245\u2013256","DOI":"10.1109\/SUPERC.1992.236678"},{"issue":"6","key":"1449_CR31","doi-asserted-by":"crossref","first-page":"28","DOI":"10.1109\/2.214440","volume":"26","author":"MC Rinard","year":"1993","unstructured":"Rinard MC, Scales DJ, Lam MS (1993) Jade: a high-level, machine-independent language for parallel programming. Computer 26(6):28\u201338","journal-title":"Computer"},{"key":"1449_CR32","doi-asserted-by":"crossref","unstructured":"Saez S, Vila J, Crespo A, Garcia A (1999) A hardware scheduler for complex real time system. In: Proceedings of the IEEE international symposium industrial electronics (ISIE). IEEE, pp 43\u201348","DOI":"10.1109\/ISIE.1999.801754"},{"key":"1449_CR33","doi-asserted-by":"crossref","unstructured":"Sjalander M, Terechko A, Duranton M (2008) A look-ahead task management unit for embedded multi-core architectures. In: Proceedings of the conference on digital system design (DSD), pp 149\u2013157","DOI":"10.1109\/DSD.2008.45"},{"key":"1449_CR34","doi-asserted-by":"crossref","unstructured":"Yazdanpanah F, Alvarez C, Jimenez-Gonalez D, Badia RM, Valero M (2015) Picos: a hardware runtime architecture support for ompss. Future Gener Comput Syst","DOI":"10.1016\/j.future.2014.12.010"},{"key":"1449_CR35","doi-asserted-by":"crossref","unstructured":"Yazdanpanah F, Jimenez-Gonzalez D, Alvarez-Martinez C, Etsion Y (2013) Hybrid dataflow\/von-Neumann architectures. IEEE Trans Parallel Distrib Syst (TPDS) 25(6):1489\u20131509","DOI":"10.1109\/TPDS.2013.125"},{"key":"1449_CR36","doi-asserted-by":"crossref","unstructured":"Yazdanpanah F, Jimenez-Gonzalez D, Alvarez-Martinez C, Etsion Y, Badia RM (2013) Analysis of the task superscalar architecture hardware design. In: Proceedings of the international conference on computational science (ICCS)","DOI":"10.1016\/j.procs.2013.05.197"},{"key":"1449_CR37","doi-asserted-by":"crossref","unstructured":"Yazdanpanah F, Jimenez-Gonzalez D, Alvarez-Martinez C, Etsion Y, Badia RM (2013) FPGA-based prototype of the task superscalar architecture. In: Proceedings of the 7th HiPEAC workshop of reconfigurable computing (WRC)","DOI":"10.1016\/j.procs.2013.05.197"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-015-1449-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-015-1449-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-015-1449-1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,26]],"date-time":"2019-08-26T08:30:52Z","timestamp":1566808252000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-015-1449-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,6,10]]},"references-count":37,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2015,9]]}},"alternative-id":["1449"],"URL":"https:\/\/doi.org\/10.1007\/s11227-015-1449-1","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,6,10]]}}}