{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,1,11]],"date-time":"2023-01-11T22:50:45Z","timestamp":1673477445973},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2015,2,8]],"date-time":"2015-02-08T00:00:00Z","timestamp":1423353600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2015,6]]},"DOI":"10.1007\/s11227-015-1383-2","type":"journal-article","created":{"date-parts":[[2015,2,7]],"date-time":"2015-02-07T02:56:22Z","timestamp":1423277782000},"page":"2309-2338","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Architectural support for task scheduling: hardware scheduling for dataflow on NUMA systems"],"prefix":"10.1007","volume":"71","author":[{"given":"Behram","family":"Khan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniel","family":"Goodman","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Salman","family":"Khan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Will","family":"Toms","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Paolo","family":"Faraboschi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mikel","family":"Luj\u00e1n","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ian","family":"Watson","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,2,8]]},"reference":[{"key":"1383_CR1","doi-asserted-by":"crossref","unstructured":"Dagum L, Menon R (1998) OpenMP: an industry-standard API for shared-memory programming. In: IEEE computer science engineering, vol 5. IEEE Computer Society Press, Los Alamitos. doi: 10.1109\/99.660313","DOI":"10.1109\/99.660313"},{"key":"1383_CR2","doi-asserted-by":"crossref","unstructured":"Blumofe RD, Joerg CF, Kuszmaul BC, Leiserson CE, Randall KH, Zhou Y (1995) Cilk: an efficient multithreaded runtime system. In: Proceedings of the 5th ACM SIGPLAN symposium on principles and practice of parallel programming, PPOPP \u201995. ACM, New York. doi: 10.1145\/209936.209958","DOI":"10.1145\/209936.209958"},{"key":"1383_CR3","volume-title":"Intel threading building blocks","author":"J Reinders","year":"2007","unstructured":"Reinders J (2007) Intel threading building blocks, 1st edn. O\u2019Reilly & Associates Inc, Sebastopol","edition":"1"},{"key":"1383_CR4","doi-asserted-by":"crossref","unstructured":"Nickolls J, Buck I, Garland M, Skadron K (2008) Scalable parallel programming with CUDA. In: Queue, vol 6. ACM, New York. doi: 10.1145\/1365490.1365500","DOI":"10.1145\/1365490.1365500"},{"key":"1383_CR5","doi-asserted-by":"crossref","unstructured":"Stone JE, Gohara D, Shi G (2010) OpenCL: a parallel programming standard for heterogeneous computing systems. In: IEEE design test, vol 12. IEEE Computer Society Press, Los Alamitos. doi: 10.1109\/MCSE.2010.69","DOI":"10.1109\/MCSE.2010.69"},{"key":"1383_CR6","doi-asserted-by":"crossref","unstructured":"Thies W, Karczmarek M, Amarasinghe SP (2002) StreamIt: a language for streaming applications. In: Proceedings of the 11th international conference on compiler construction, CC \u201902. Springer-Verlag, London. http:\/\/dl.acm.org\/citation.cfm?id=647478.727935","DOI":"10.1007\/3-540-45937-5_14"},{"key":"1383_CR7","unstructured":"Jenista JC, Eom YH, Demsky B (2010) OoOJava: an out-of-order approach to parallel programming. In: Proceedings of the 2nd USENIX conference on hot topics in parallelism, HotPar\u201910. USENIX Association, Berkeley. http:\/\/dl.acm.org\/citation.cfm?id=1863086.1863097"},{"key":"1383_CR8","doi-asserted-by":"crossref","unstructured":"Perez JM, Badia RM, Labarta J (2008) A dependency-aware task-based programming environment for multi-core architectures. In: Proceedings of the 2008 IEEE international conference on cluster computing","DOI":"10.1109\/CLUSTR.2008.4663765"},{"key":"1383_CR9","unstructured":"Watson I et al (2010) The TERAFLUX project. http:\/\/www.teraflux.org . Accessed 1 Jan 2015"},{"key":"1383_CR10","doi-asserted-by":"crossref","unstructured":"Gurd JR, Kirkham CC, Watson I (1985) The manchester prototype dataflow computer. In: Communication ACM, vol 28. ACM, New York. doi: 10.1145\/2465.2468","DOI":"10.1145\/2465.2468"},{"key":"1383_CR11","doi-asserted-by":"crossref","unstructured":"Papadopoulos GM, Culler DE (1990) Monsoon: an explicit token-store architecture. In: Proceedings of the 17th annual international symposium on computer architecture, ISCA \u201990. ACM, New York. doi: 10.1145\/325164.325117","DOI":"10.1145\/325164.325117"},{"key":"1383_CR12","doi-asserted-by":"crossref","unstructured":"Cann D (1992) Retire fortran?: a debate rekindled. In: Communication ACM, vol 35. ACM, New York. doi: 10.1145\/135226.135231","DOI":"10.1145\/135226.135231"},{"key":"1383_CR13","doi-asserted-by":"crossref","unstructured":"Watson I, Woods V, Watson P, Banach R, Greenberg M, Sargeant J (1988) Flagship: a parallel architecture for declarative programming. In: Proceedings of the 15th annual international symposium on computer architecture, ISCA \u201988. IEEE Computer Society Press, Los Alamitos. http:\/\/dl.acm.org\/citation.cfm?id=52400.52415","DOI":"10.1109\/ISCA.1988.5221"},{"key":"1383_CR14","doi-asserted-by":"crossref","unstructured":"Darlington J, Reeve M (1981) ALICE a multi-processor reduction machine for the parallel evaluation CF applicative languages. In: Proceedings of the 1981 conference on functional programming languages and computer architecture, FPCA \u201981. ACM, New York. doi: 10.1145\/800223.806764","DOI":"10.1145\/800223.806764"},{"key":"1383_CR15","doi-asserted-by":"crossref","unstructured":"Peyton Jones SL, Clack C, Salkild J, Hardie M (1987) GRIP&Mdash; a high-performance architecture for parallel graph reduction. In: Proceedings of a conference on functional programming languages and computer architecture. Springer-Verlag, London. http:\/\/dl.acm.org\/citation.cfm?id=36583.36590","DOI":"10.1007\/3-540-18317-5_7"},{"key":"1383_CR16","doi-asserted-by":"crossref","unstructured":"Dean J, Ghemawat S (2008) MapReduce: simplified data processing on large clusters. In: Communication ACM, vol 51. ACM, New York. doi: 10.1145\/1327452.1327492","DOI":"10.1145\/1327452.1327492"},{"key":"1383_CR17","unstructured":"Peng D, Dabek F (2010) Large-scale incremental processing using distributed transactions and notifications. In: Proceedings of the 9th USENIX conference on operating systems design and implementation, OSDI\u201910. USENIX Association, Berkeley. http:\/\/dl.acm.org\/citation.cfm?id=1924943.1924961"},{"key":"1383_CR18","doi-asserted-by":"crossref","unstructured":"Goodman D, Khan S, Seaton C, Guskov Y, Khan B, Lujan M, Watson I (2012) DFScala: high level dataflow support for Scala. In: Proceedings of the data-flow execution models for extreme scale computing","DOI":"10.1109\/DFM.2012.12"},{"key":"1383_CR19","volume-title":"Programming in Scala: a comprehensive step-by-step guide","author":"M Odersky","year":"2008","unstructured":"Odersky M, Spoon L, Venners B (2008) Programming in Scala: a comprehensive step-by-step guide, 1st edn. Artima Incorporation, USA","edition":"1"},{"key":"1383_CR20","unstructured":"Roberts ES, Vandevoorde MT (1989) WORKCREWS : an abstraction for controlling parallelism, vol 42. http:\/\/opac.inria.fr\/record=b1047311"},{"key":"1383_CR21","doi-asserted-by":"crossref","unstructured":"Mohr E, Kranz DA, Halstead Jr RH (1990) Lazy task creation: a technique for increasing the granularity of parallel programs. In: Proceedings of the 1990 ACM conference on LISP and functional programming, LFP \u201990. ACM, New York. doi: 10.1145\/91556.91631","DOI":"10.1145\/91556.91631"},{"key":"1383_CR22","doi-asserted-by":"crossref","unstructured":"Hendler D, Shavit N (2002) Non-blocking steal-half work queues. In: Proceedings of the 21st annual symposium on principles of distributed computing, PODC \u201902. ACM, New York. doi: 10.1145\/571825.571876","DOI":"10.1145\/571825.571876"},{"key":"1383_CR23","doi-asserted-by":"crossref","unstructured":"Chase D, Lev Y (2005) Dynamic circular work-stealing deque. In: Proceedings of the 17th annual ACM symposium on parallelism in algorithms and architectures, SPAA \u201905. ACM, New York. doi: 10.1145\/1073970.1073974","DOI":"10.1145\/1073970.1073974"},{"key":"1383_CR24","doi-asserted-by":"crossref","unstructured":"Acar UA, Blelloch GE, Blumofe RD (2000) The data locality of work stealing. In: Proceedings of the 12th annual ACM symposium on parallel algorithms and architectures, SPAA \u201900. ACM, New York. doi: 10.1145\/341800.341801","DOI":"10.1145\/341800.341801"},{"key":"1383_CR25","doi-asserted-by":"crossref","unstructured":"Bloom BH (1970) Space\/time trade-offs in hash coding with allowable errors. In: Communication ACM, vol 13. ACM, New York. doi: 10.1145\/362686.362692","DOI":"10.1145\/362686.362692"},{"key":"1383_CR26","doi-asserted-by":"crossref","unstructured":"Kumar S, Hughes CJ, Nguyen A (2007) Carbon: architectural support for fine-grained parallelism on chip multiprocessors. In: Proceedings of the 34th annual international symposium on computer architecture, ISCA \u201907. ACM, New York. doi: 10.1145\/1250662.1250683","DOI":"10.1145\/1250662.1250683"},{"key":"1383_CR27","doi-asserted-by":"crossref","unstructured":"Binkert N, Beckmann B, Black G, Reinhardt SK, Saidi A, Basu A, Hestness J, Hower DR, Krishna T, Sardashti S, Sen R, Sewell K, Shoaib M, Vaish N, Hill MD, Wood DA (2011) The Gem5 simulator. In: SIGARCH computer architecture news, vol 39. ACM, New York. doi: 10.1145\/2024716.2024718","DOI":"10.1145\/2024716.2024718"},{"key":"1383_CR28","doi-asserted-by":"crossref","unstructured":"Horn B, Schunck B (1981) Determining optical flow. In: Artificial intelligence, vol 17. Elsevier, London","DOI":"10.1016\/0004-3702(81)90024-2"},{"key":"1383_CR29","unstructured":"Project Gutenberg (1971). http:\/\/www.gutenberg.org\/"},{"key":"1383_CR30","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4757-0450-1","volume-title":"Pattern recognition with fuzzy objective function algorithms","author":"JC Bezdek","year":"1981","unstructured":"Bezdek JC (1981) Pattern recognition with fuzzy objective function algorithms. Kluwer Academic Publishers, Norwell"},{"key":"1383_CR31","doi-asserted-by":"crossref","unstructured":"Lea D (2000) A Java fork\/join framework. In: Proceedings of the ACM 2000 conference on Java Grande","DOI":"10.1145\/337449.337465"},{"key":"1383_CR32","doi-asserted-by":"crossref","unstructured":"Halstead Jr RH (1984) Implementation of multiLISP: LISP on a multiprocessor. In: Proceedings of the 1984 ACM symposium on LISP and functional programming, LFP \u201984. ACM, New York. doi: 10.1145\/800055.802017","DOI":"10.1145\/800055.802017"},{"key":"1383_CR33","doi-asserted-by":"crossref","unstructured":"Kwok YK, Ahmad I (1999) Static scheduling algorithms for allocating directed task graphs to multiprocessors. In: ACM Computer Surveys, vol 31. ACM, New York. doi: 10.1145\/344588.344618","DOI":"10.1145\/344588.344618"},{"key":"1383_CR34","unstructured":"Su E, Tian X, Girkar M, Haab G, Shah S, Petersen P (2002) Compiler support of the workqueuing execution model for Intel SMP architectures. In: 4th European workshop on OpenMP"},{"key":"1383_CR35","doi-asserted-by":"crossref","unstructured":"Arora NS, Blumofe RD, Plaxton CG (1998) Thread scheduling for multiprogrammed multiprocessors. In: Proceedings of the 10th annual ACM symposium on parallel algorithms and architectures, SPAA \u201998. ACM, New York. doi: 10.1145\/277651.277678","DOI":"10.1145\/277651.277678"},{"key":"1383_CR36","doi-asserted-by":"crossref","unstructured":"Sanchez D, Yoo RM, Kozyrakis C (2010) Flexible architectural support for fine-grain scheduling. In: Proceedings of the 15th edition of ASPLOS on architectural support for programming languages and operating systems, ASPLOS XV. ACM, New York. doi: 10.1145\/1736020.1736055","DOI":"10.1145\/1736020.1736055"},{"key":"1383_CR37","volume-title":"Principles and practices of interconnection networks","author":"W Dally","year":"2003","unstructured":"Dally W, Towles B (2003) Principles and practices of interconnection networks. Morgan Kaufmann Publishers Inc., San Francisco"},{"key":"1383_CR38","doi-asserted-by":"crossref","unstructured":"Yoo RM, Hughes CJ, Kim C, Chen YK, Kozyrakis C (2013) Locality-aware task management for unstructured parallelism: a quantitative limit study. In: Proceedings of the 25th annual ACM symposium on parallelism in algorithms and architectures, SPAA \u201913. ACM, New York. doi: 10.1145\/2486159.2486175","DOI":"10.1145\/2486159.2486175"},{"key":"1383_CR39","doi-asserted-by":"crossref","unstructured":"Chen S, Gibbons PB, Kozuch M, Liaskovitis V, Ailamaki A, Blelloch GE, Falsafi B, Fix L, Hardavellas N, Mowry TC, Wilkerson C (2007) Scheduling threads for constructive cache sharing on CMPs. In: Proceedings of the 19th annual ACM symposium on parallel algorithms and architectures, SPAA \u201907. ACM, New York. doi: 10.1145\/1248377.1248396","DOI":"10.1145\/1248377.1248396"},{"key":"1383_CR40","doi-asserted-by":"crossref","unstructured":"Blelloch GE, Gibbons PB (2004) Effectively sharing a cache among threads. In: Proceedings of the 16th annual ACM symposium on parallelism in algorithms and architectures, SPAA \u201904. ACM, New York. doi: 10.1145\/1007912.1007948","DOI":"10.1145\/1007912.1007948"},{"key":"1383_CR41","doi-asserted-by":"crossref","unstructured":"Blelloch GE, Gibbons PB, Matias Y (1999) Provably efficient scheduling for languages with fine-grained parallelism. In: Journal of ACM, vol 46. ACM, New York. doi: 10.1145\/301970.301974","DOI":"10.1145\/301970.301974"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-015-1383-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11227-015-1383-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-015-1383-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,1]],"date-time":"2019-06-01T10:40:38Z","timestamp":1559385638000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11227-015-1383-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,2,8]]},"references-count":41,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2015,6]]}},"alternative-id":["1383"],"URL":"https:\/\/doi.org\/10.1007\/s11227-015-1383-2","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,2,8]]}}}