{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,3,29]],"date-time":"2022-03-29T14:18:53Z","timestamp":1648563533684},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2015,7,21]],"date-time":"2015-07-21T00:00:00Z","timestamp":1437436800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Parallel Prog"],"published-print":{"date-parts":[[2016,4]]},"DOI":"10.1007\/s10766-015-0373-6","type":"journal-article","created":{"date-parts":[[2015,7,20]],"date-time":"2015-07-20T12:49:10Z","timestamp":1437396550000},"page":"278-307","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["The Design and Implementation of TIDeFlow: A Dataflow-Inspired Execution Model for Parallel Loops and Task Pipelining"],"prefix":"10.1007","volume":"44","author":[{"given":"Daniel","family":"Orozco","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Elkin","family":"Garcia","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Robert","family":"Pavel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jaime","family":"Arteaga","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guang","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,7,21]]},"reference":[{"key":"373_CR1","doi-asserted-by":"crossref","first-page":"465","DOI":"10.1007\/BFb0014218","volume-title":"Languages and Compilers for Parallel Computing, Lecture Notes in Computer Science","author":"G Agrawal","year":"1996","unstructured":"Agrawal, G., Saltz, J.: Interprocedural data flow based optimizations for compilation of irregular problems. In: Huang, C.H., Sadayappan, P., Banerjee, U., Gelernter, D., Nicolau, A., Padua, D. (eds.) Languages and Compilers for Parallel Computing, Lecture Notes in Computer Science, vol. 1033, pp. 465\u2013479. Springer, Berlin (1996). doi: 10.1007\/BFb0014218"},{"key":"373_CR2","doi-asserted-by":"crossref","unstructured":"Arvind, Culler, D.E.: Dataflow Architectures, pp. 225\u2013253. Annual Reviews Inc., Palo Alto. http:\/\/portal.acm.org\/citation.cfm?id=17814.17824 (1986)","DOI":"10.1146\/annurev.cs.01.060186.001301"},{"key":"373_CR3","doi-asserted-by":"crossref","unstructured":"Blumofe, R., Leiserson, C.: Scheduling multithreaded computations by work stealing. In: Foundations of Computer Science, 1994 Proceedings, 35th Annual Symposium on, pp. 356 \u2013368 (1994). doi: 10.1109\/SFCS.1994.365680","DOI":"10.1109\/SFCS.1994.365680"},{"key":"373_CR4","volume-title":"Programming with POSIX Threads","author":"D Butenhof","year":"1997","unstructured":"Butenhof, D.: Programming with POSIX Threads. Addison-Wesley Professional, Boston (1997)"},{"key":"373_CR5","volume-title":"Using OpenMP: Portable Shared Memory Parallel Programming (Scientific and Engineering Computation)","author":"B Chapman","year":"2007","unstructured":"Chapman, B., Jost, G., van der Pas, R.: Using OpenMP: Portable Shared Memory Parallel Programming (Scientific and Engineering Computation). MIT Press, Cambridge (2007)"},{"key":"373_CR6","doi-asserted-by":"crossref","unstructured":"del Cuvillo, J., Zhu, W., Gao, G.: Landing openmp on cyclops-64: an efficient mapping of openmp to a many-core system-on-a-chip. In: CF \u201906: Proceedings of the 3rd Conference on Computing Frontiers, ACM, New York, NY, USA, pp. 41\u201350, (2006a). doi: 10.1145\/1128022.1128030","DOI":"10.1145\/1128022.1128030"},{"key":"373_CR7","doi-asserted-by":"crossref","unstructured":"del Cuvillo, J., Zhu, W., Hu, Z., Gao, G.R.: Toward a software infrastructure for the cyclops-64 cellular architecture. In: High-Performance Computing in an Advanced Collaborative Environment, p. 9 (2006b). doi: 10.1109\/HPCS.2006.48","DOI":"10.1109\/HPCS.2006.48"},{"key":"373_CR8","doi-asserted-by":"crossref","unstructured":"Del Cuvillo, J., Zhu, W., Hu, Z., Gao, G.: Tiny threads: a thread virtual machine for the cyclops64 cellular architecture. In: Parallel and Distributed Processing Symposium, 2005. Proceedings. 19th IEEE International, IEEE, p. 8 (2005)","DOI":"10.1109\/IPDPS.2005.434"},{"key":"373_CR9","doi-asserted-by":"crossref","unstructured":"Dennis, J.B.: First version of a data flow procedure language. In: Programming Symposium, Proceedings Colloque sur la Programmation. Springer, London, pp. 362\u2013376 (1974). http:\/\/portal.acm.org\/citation.cfm?id=647323.721501","DOI":"10.1007\/3-540-06859-7_145"},{"key":"373_CR10","doi-asserted-by":"crossref","unstructured":"Duran, A., Ayguad, E., Badia, R.M., Labarta, J., Martinell, L., Martorell, X., Planas, J.: Ompss: a proposal for programming heterogeneous multi-core architectures. Parallel Process. Lett. 21(02), pp. 173\u2013193 (2011). doi: 10.1142\/S0129626411000151","DOI":"10.1142\/S0129626411000151"},{"key":"373_CR11","unstructured":"Ebcioglu, K., Saraswat, V., Sarkar, V.: X10: programming for hierarchical parallelism and non-uniform data access. In: Proceedings of the International Workshop on Language Runtimes, OOPSLA (2004)"},{"key":"373_CR12","doi-asserted-by":"crossref","unstructured":"Ellson, J., Gansner, E., Koutsofios, L., North, S., Woodhull, G.: Graphviz\u2014open source graph drawing tools. In: Mutzel, P., J\u00fcnger, M., Leipert, S.(eds.) Graph Drawing. Lecture Notes in Computer Science, vol. 2265, pp. 483\u2013484. Springer, Berlin Heidelberg (2002). doi: 10.1007\/3-540-45848-4_57","DOI":"10.1007\/3-540-45848-4_57"},{"key":"373_CR13","unstructured":"Gao, G.R.: A pipelined code mapping scheme for static data flow computers. PhD thesis, Massachusetts Institute of Technology. http:\/\/hdl.handle.net\/1721.1\/37165 (1986)"},{"key":"373_CR14","doi-asserted-by":"crossref","unstructured":"Garcia, E., Orozco, D., Khan, R., Venetis, I., Livingston, K., Gao, G.: A dynamic schema to increase performance in many-core architectures through percolation operations. In: Proceedings of the 2013 IEEE International Conference on High Performance Computing (HiPC 2013), Bangalore. IEEE Computer Society (2013)","DOI":"10.1109\/HiPC.2013.6799134"},{"key":"373_CR15","doi-asserted-by":"crossref","unstructured":"Garcia, E., Venetis, I.E., Khan, R., Gao, G.: Optimized dense matrix multiplication on a many-core architecture. In: Proceedings of the Sixteenth International Conference on Parallel Computing (Euro-Par 2010), Part II, Springer, Ischia, Italy, Lecture Notes in Computer Science, vol. 6272, pp. 316\u2013327 (2010b)","DOI":"10.1007\/978-3-642-15291-7_29"},{"key":"373_CR16","doi-asserted-by":"crossref","unstructured":"Garcia, E., Venetis, I.E., Khan, R., Gao, G.R.: Optimized dense matrix multiplication on a many-core architecture. In: Euro-Par 2010-Parallel Processing, pp. 316\u2013327 (2010c)","DOI":"10.1007\/978-3-642-15291-7_29"},{"key":"373_CR17","doi-asserted-by":"crossref","unstructured":"Garcia, E., Orozco, D., Khan, R., Venetis, I.E., Livingston, K., Gao, G.R.: Dynamic percolation: a case of study on the shortcomings of traditional optimization in many-core architectures. In: ACM International Conference on Computing Frontiers 2012 (CF\u201912) (2012a)","DOI":"10.1145\/2212908.2212944"},{"key":"373_CR18","doi-asserted-by":"crossref","unstructured":"Garcia, E., Orozco, D., Pavel, R., Gao, G.: A discussion in favor of dynamic scheduling for regular applications in many-core architectures. In: Parallel and Distributed Processing Symposium Workshops and PhD Forum (IPDPSW), 2012 IEEE 26th International, IEEE, pp. 1591\u20131600 (2012b)","DOI":"10.1109\/IPDPSW.2012.200"},{"key":"373_CR19","doi-asserted-by":"crossref","unstructured":"Garcia, E., Orozco, D., Pavel, R., Gao, G.R.: A discussion in favor of Dynamic Scheduling for regular applications in Many-core Architectures. In: Proceedings of 2012 Workshop on Multithreaded Architectures and Applications (MTAAP 2012); 26th IEEE International Parallel & Distributed Processing Symposium (IPDPS 2012), pp. 1591\u20131600. ACM, Shanghai (2012)","DOI":"10.1109\/IPDPSW.2012.200"},{"key":"373_CR20","doi-asserted-by":"crossref","unstructured":"Gautier, T., Besseron, X., Pigeon, L.: Kaapi: A thread scheduling runtime system for data flow computations on cluster of multi-processors. In: Proceedings of the 2007 International Workshop on Parallel Symbolic Computation, PASCO \u201907, pp. 15\u201323. ACM, New York, NY, USA (2007)","DOI":"10.1145\/1278177.1278182"},{"key":"373_CR21","volume-title":"Using MPI-2: Advanced Features of the Message-Passing Interface","author":"W Gropp","year":"1999","unstructured":"Gropp, W., Lusk, E., Thakur, R.: Using MPI-2: Advanced Features of the Message-Passing Interface. MIT Press, Cambridge (1999)"},{"key":"373_CR22","doi-asserted-by":"crossref","unstructured":"Irigoin, F., Triolet, R.: Supernode partitioning. In: Proceedings of the 15th ACM SIGPLAN-SIGACT symposium on Principles of Programming Languages, pp. 319\u2013329. ACM (1988)","DOI":"10.1145\/73560.73588"},{"issue":"4","key":"373_CR23","doi-asserted-by":"crossref","first-page":"541","DOI":"10.1109\/5.24143","volume":"77","author":"T Murata","year":"1989","unstructured":"Murata, T.: Petri nets: properties, analysis and applications. Proc. IEEE 77(4), 541\u2013580 (1989). doi: 10.1109\/5.24143","journal-title":"Proc. IEEE"},{"key":"373_CR24","doi-asserted-by":"crossref","first-page":"1907","DOI":"10.1016\/S0167-8191(99)00070-8","volume":"25","author":"WA Najjar","year":"1999","unstructured":"Najjar, W.A., Lee, E.A., Gao, G.R.: Advances in the dataflow computational model. Parallel Comput. 25, 1907\u20131929 (1999)","journal-title":"Parallel Comput."},{"key":"373_CR25","doi-asserted-by":"crossref","unstructured":"Nemawarkar, S., Gao, G.: Measurement and modeling of earth-manna multithreaded architecture. In: Modeling, Analysis, and Simulation of Computer and Telecommunication Systems. MASCOTS \u201996, Proceedings of the Fourth International Workshop on, pp. 109\u2013114 (1996). doi: 10.1109\/MASCOT.1996.501002","DOI":"10.1109\/MASCOT.1996.501002"},{"key":"373_CR26","doi-asserted-by":"crossref","unstructured":"Gulati, K., Khatri, S.P.: GPU architecture and the CUDA programming model. In: Hardware acceleration of EDA algorithms, pp. 23\u201330. Springer US (2010). doi: 10.1007\/978-1-4419-0944-2_3","DOI":"10.1007\/978-1-4419-0944-2_3"},{"key":"373_CR27","doi-asserted-by":"crossref","unstructured":"Orozco, D.: Tideflow: a parallel execution model for high performance computing programs. In: 2011 International Conference on Parallel Architectures and Compilation Techniques, p. 211 (2011)","DOI":"10.1109\/PACT.2011.44"},{"key":"373_CR28","doi-asserted-by":"crossref","unstructured":"Orozco, D., Gao, G.: Mapping the FDTD application to many-core chip architectures. In: Parallel Processing. ICPP \u201909. International Conference on, pp. 309\u2013316 (2009)","DOI":"10.1109\/ICPP.2009.44"},{"key":"373_CR29","doi-asserted-by":"crossref","unstructured":"Orozco, D., Xue, L., Bolat, M., Li, X., Gao, G.R.: Experience of optimizing FFT on intel architectures. In: Parallel and Distributed Processing Symposium. IPDPS 2007. IEEE International, IEEE, pp. 1\u20138 (2007)","DOI":"10.1109\/IPDPS.2007.370638"},{"key":"373_CR30","doi-asserted-by":"crossref","unstructured":"Orozco, D., Garcia, E., Gao, G.: Locality optimization of stencil applications using data dependency graphs. In: Proceedings of the 23rd International Conference on Languages and Compilers for Parallel Computing, LCPC\u201910, pp. 77\u201391. Springer, Berlin (2011a)","DOI":"10.1007\/978-3-642-19595-2_6"},{"key":"373_CR31","unstructured":"Orozco, D., Garcia, E., Khan, R., Livingston, K., Gao, G.: High throughput queue algorithms. Tech. rep., CAPSL Technical Memo 103 (2011b)"},{"key":"373_CR32","unstructured":"Orozco, D., Garcia, E., Pavel, R., Khan, R., Gao, G.R.: Polytasks: a compressed task representation for hpc runtimes. In: Proceedings of the 24th International Conference on Languages and Compilers for Parallel Computing, LCPC 11 (2011c)"},{"key":"373_CR33","unstructured":"Orozco, D., Garcia, E., Pavel, R., Khan, R., Gao, G.R.: Polytasks: a compressed task representation for hpc runtimes. CAPSL Technical Memo 105 (2011d)"},{"issue":"4","key":"373_CR34","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1145\/2086696.2086728","volume":"8","author":"D Orozco","year":"2012","unstructured":"Orozco, D., Garcia, E., Khan, R., Livingston, K., Gao, G.R.: Toward high-throughput algorithms on many-core architectures. ACM Trans. Archit. Code Optim. 8(4), 49 (2012)","journal-title":"ACM Trans. Archit. Code Optim."},{"key":"373_CR35","doi-asserted-by":"crossref","unstructured":"Sarkar, V., Hennessy, J.: Partitioning parallel programs for macro-dataflow. In: Proceedings of the 1986 ACM Conference on LISP and Functional Programming, LFP \u201986, pp. 202\u2013211. ACM, New York, NY, USA (1986). doi: 10.1145\/319838.319863","DOI":"10.1145\/319838.319863"},{"issue":"3","key":"373_CR36","doi-asserted-by":"crossref","first-page":"66","DOI":"10.1109\/MCSE.2010.69","volume":"12","author":"JE Stone","year":"2010","unstructured":"Stone, J.E., Gohara, D., Shi, G.: Opencl: a parallel programming standard for heterogeneous computing systems. Comput. Sci. Eng. 12(3), 66 (2010)","journal-title":"Comput. Sci. Eng."},{"key":"373_CR37","unstructured":"Theobald, K.: Earth: an efficient architecture for running threads. PhD thesis, University of Delaware (1999)"},{"key":"373_CR38","doi-asserted-by":"crossref","unstructured":"Yan, Y., Chatterjee, S., Orozco, D., Garcia, E., Budimlic, Z., Shirako, J., Pavel, R., Sarkar, V., Gao, G.: Hardware and software tradeoffs for task synchronization on manycore architectures. In: Proceedings of the Seventeenth International Conference on Parallel Computing (Euro-Par 2011), Bordeaux, France, Lecture Notes in Computer Science (2011)","DOI":"10.1007\/978-3-642-23397-5_12"},{"key":"373_CR39","doi-asserted-by":"crossref","unstructured":"Zuckerman, S., Suetterlein, J., Knauerhase, R.,Gao, G.: Using a codelet program execution model for exascale machines: position paper. In: Proceedings of the 1st International Workshop on Adaptive Self-Tuning Computing Systems for the Exaflop Era, pp. 64\u201369. ACM (2011)","DOI":"10.1145\/2000417.2000424"}],"container-title":["International Journal of Parallel Programming"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-015-0373-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10766-015-0373-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10766-015-0373-6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,28]],"date-time":"2019-08-28T12:48:52Z","timestamp":1566996532000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10766-015-0373-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,7,21]]},"references-count":39,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2016,4]]}},"alternative-id":["373"],"URL":"https:\/\/doi.org\/10.1007\/s10766-015-0373-6","relation":{},"ISSN":["0885-7458","1573-7640"],"issn-type":[{"value":"0885-7458","type":"print"},{"value":"1573-7640","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,7,21]]}}}