{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T08:43:39Z","timestamp":1725871419939},"publisher-location":"Cham","reference-count":21,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319499550"},{"type":"electronic","value":"9783319499567"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-49956-7_4","type":"book-chapter","created":{"date-parts":[[2016,11,18]],"date-time":"2016-11-18T09:01:20Z","timestamp":1479459680000},"page":"43-61","source":"Crossref","is-referenced-by-count":5,"title":["Formalizing Data Locality in Task Parallel Applications"],"prefix":"10.1007","author":[{"given":"Germ\u00e1n","family":"Ceballos","sequence":"first","affiliation":[]},{"given":"Erik","family":"Hagersten","sequence":"additional","affiliation":[]},{"given":"David","family":"Black-Schaffer","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,11,19]]},"reference":[{"key":"4_CR1","unstructured":"The cache complexity of multithreaded cache oblivious algorithms. Theory of Computing Systems 45(2) (2009)"},{"issue":"3","key":"4_CR2","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1007\/s00224-002-1057-3","volume":"35","author":"U Acar","year":"2002","unstructured":"Acar, U., Blelloch, G., Blumofe, R.: The data locality of work stealing. Theory Comput. Syst. 35(3), 321\u2013347 (2002)","journal-title":"Theory Comput. Syst."},{"issue":"2","key":"4_CR3","doi-asserted-by":"crossref","first-page":"187","DOI":"10.1002\/cpe.1631","volume":"23","author":"C Augonnet","year":"2011","unstructured":"Augonnet, C., Thibault, S., Namyst, R., Wacrenier, P.: StarPU: a unified platform for task scheduling on heterogeneous multicore architectures. Concurr. Comput. Pract. Exper. 23(2), 187\u2013198 (2011)","journal-title":"Concurr. Comput. Pract. Exper."},{"key":"4_CR4","doi-asserted-by":"crossref","unstructured":"Berg, E., Hagersten, E.: Statcache: a probabilistic approach to efficient and accurate data locality analysis. In: Proceedings of the 2004 IEEE International Symposium on Performance Analysis of Systems and Software (2004)","DOI":"10.1109\/ISPASS.2004.1291352"},{"issue":"1","key":"4_CR5","doi-asserted-by":"crossref","first-page":"169","DOI":"10.1145\/1071690.1064232","volume":"33","author":"E Berg","year":"2005","unstructured":"Berg, E., Hagersten, E.: Fast data-locality profiling of native execution. SIGMETRICS Perform. Eval. Rev. 33(1), 169\u2013180 (2005)","journal-title":"SIGMETRICS Perform. Eval. Rev."},{"key":"4_CR6","doi-asserted-by":"crossref","unstructured":"Berg, E., Zeffer, H., Hagersten, E.: A statistical multiprocessor cache model. In: IEEE International Symposium on Performance Analysis of Systems and Software, pp. 89\u201399, March 2006","DOI":"10.1109\/ISPASS.2006.1620793"},{"issue":"5","key":"4_CR7","doi-asserted-by":"crossref","first-page":"720","DOI":"10.1145\/324133.324234","volume":"46","author":"RD Blumofe","year":"1999","unstructured":"Blumofe, R.D., Leiserson, C.E.: Scheduling multithreaded computations by work stealing. J. ACM 46(5), 720\u2013748 (1999)","journal-title":"J. ACM"},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"Cao, Q., Zuo, M.: A scheduling strategy supporting OpenMP task on heterogeneous multicore. In: 26th IEEE International Parallel and Distributed Processing Symposium Workshops & PhD Forum, IPDPS 2012, Shanghai, China, 21\u201325 May 2012, pp. 2077\u20132084 (2012)","DOI":"10.1109\/IPDPSW.2012.244"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Chen, Q., Guo, M., Huang, Z.: Cats: cache aware task-stealing based on online profiling in multi-socket multi-core architectures. In: Proceedings of the 26th ACM International Conference on Supercomputing, ICS 2012, pp. 163\u2013172 (2012)","DOI":"10.1145\/2304576.2304599"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Ding, Y., Hu, K., Zhao, Z.: Performance monitoring and analysis of task-based OpenMP (2013)","DOI":"10.1371\/journal.pone.0077742"},{"key":"4_CR11","doi-asserted-by":"crossref","unstructured":"Duran, A., Teruel, X., Ferrer, R., Martorell, X., Ayguade, E.: Barcelona OpenMP tasks suite: a set of benchmarks targeting the exploitation of task parallelism in OpenMP. In: International Conference on Parallel Processing, ICPP 2009, pp. 124\u2013131, September 2009","DOI":"10.1109\/ICPP.2009.64"},{"key":"4_CR12","doi-asserted-by":"crossref","unstructured":"Eklov, D., Black-Schaffer, D., Hagersten, E.: StatCC: a statistical cache contention model. In: Proceedings of the 19th International Conference on Parallel Architectures and Compilation Techniques, PACT 2010, pp. 551\u2013552 (2010)","DOI":"10.1145\/1854273.1854347"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"Ekl\u00f6v, D., Hagersten, E.: StatStack: efficient modeling of LRU caches. In: Proceeding International Symposium on Performance Analysis of Systems and Software: ISPASS 2010, pp. 55\u201365. IEEE (2010)","DOI":"10.1109\/ISPASS.2010.5452069"},{"key":"4_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1007\/978-3-642-40698-0_10","volume-title":"OpenMP in the Era of Low Power Devices and Accelerators","author":"P Ghosh","year":"2013","unstructured":"Ghosh, P., Yan, Y., Eachempati, D., Chapman, B.: A prototype implementation of OpenMP task dependency support. In: Rendell, A.P., Chapman, B.M., M\u00fcller, M.S. (eds.) IWOMP 2013. LNCS, vol. 8122, pp. 128\u2013140. Springer, Heidelberg (2013). doi: 10.1007\/978-3-642-40698-0_10"},{"key":"4_CR15","unstructured":"Jaleel, A., Cohn, R.S., keung Luk, C., Jacob, B.: Cmp$im: a pin-based on-the-fly multi-core cache simulator. In: The Fourth Annual Workshop on Modeling, Benchmarking and Simulation (MoBS), Co-located with ISCA 2008 (2008)"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Lorenz, D., Philippen, P., Schmidl, D., Wolf, F.: Profiling of OpenMP tasks with Score-P. In: 41st International Conference on Parallel Processing Workshops, ICPPW 2012, Pittsburgh, PA, USA, 10\u201313 September 2012, pp. 444\u2013453 (2012)","DOI":"10.1109\/ICPPW.2012.62"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Luk, C.-K., Cohn, R., Muth, R., Patil, H., Klauser, A., Lowney, G., Wallace, S., Reddi, V.J., Hazelwood, K.: Pin: building customized program analysis tools with dynamic instrumentation. In: Proceedings of the 2005 ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI 2005, pp. 190\u2013200 (2005)","DOI":"10.1145\/1065010.1065034"},{"key":"4_CR18","unstructured":"OpenMP Architecture Review Board. OpenMP application program interface version 3.0 (2008)"},{"key":"4_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"196","DOI":"10.1007\/978-3-642-30961-8_15","volume-title":"OpenMP in a Heterogeneous World","author":"D Schmidl","year":"2012","unstructured":"Schmidl, D., Philippen, P., Lorenz, D., R\u00f6ssel, C., Geimer, M., Mey, D., Mohr, B., Wolf, F.: Performance analysis techniques for task-based OpenMP applications. In: Chapman, B.M., Massaioli, F., M\u00fcller, M.S., Rorro, M. (eds.) IWOMP 2012. LNCS, vol. 7312, pp. 196\u2013209. Springer, Heidelberg (2012). doi: 10.1007\/978-3-642-30961-8_15"},{"key":"4_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"440","DOI":"10.1007\/978-3-540-24688-6_58","volume-title":"Computational Science - ICCS 2004","author":"J Weidendorfer","year":"2004","unstructured":"Weidendorfer, J., Kowarschik, M., Trinitis, C.: A tool suite for simulation based analysis of memory access behavior. In: Bubak, M., Albada, G.D., Sloot, P.M.A., Dongarra, J. (eds.) ICCS 2004. LNCS, vol. 3038, pp. 440\u2013447. Springer, Heidelberg (2004). doi: 10.1007\/978-3-540-24688-6_58"},{"issue":"1\u20133","key":"4_CR21","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1504\/IJHPCN.2004.007564","volume":"1","author":"T Weng","year":"2004","unstructured":"Weng, T., Chapman, B.: Towards optimisation of openmp codes for synchronisation and data reuse. Int. J. High Perform. Comput. Netw. 1(1\u20133), 43\u201354 (2004)","journal-title":"Int. J. High Perform. Comput. Netw."}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-49956-7_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,15]],"date-time":"2019-09-15T18:57:35Z","timestamp":1568573855000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-49956-7_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319499550","9783319499567"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-49956-7_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2016]]}}}