{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T15:28:44Z","timestamp":1743089324855,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":20,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642552236"},{"type":"electronic","value":"9783642552243"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-642-55224-3_42","type":"book-chapter","created":{"date-parts":[[2014,5,5]],"date-time":"2014-05-05T20:44:02Z","timestamp":1399322642000},"page":"447-457","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Evaluation of Autoparallelization Toolkits for Commodity GPUs"],"prefix":"10.1007","author":[{"given":"David","family":"Williams","sequence":"first","affiliation":[]},{"given":"Valeriu","family":"Codreanu","sequence":"additional","affiliation":[]},{"given":"Po","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Baoquan","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Feng","family":"Dong","sequence":"additional","affiliation":[]},{"given":"Burhan","family":"Yasar","sequence":"additional","affiliation":[]},{"given":"Babak","family":"Mahdian","sequence":"additional","affiliation":[]},{"given":"Alessandro","family":"Chiarini","sequence":"additional","affiliation":[]},{"given":"Xia","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Jos B. T. M.","family":"Roerdink","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,5,6]]},"reference":[{"issue":"1","key":"42_CR1","doi-asserted-by":"publisher","first-page":"80","DOI":"10.1111\/j.1467-8659.2007.01012.x","volume":"26","author":"JD Owens","year":"2007","unstructured":"Owens, J.D., Luekbe, D., Govindaraju, N., Harris, M., Krger, J., Lefohn, A.E., Purcell, T.J.: A survey of general-purpose computation on graphics hardware. Comput. Graph. Forum 26(1), 80\u2013113 (2007)","journal-title":"Comput. Graph. Forum"},{"key":"42_CR2","unstructured":"Amini, M., Creusillet, B., Even, S., Keryell, R., Goubier, O., Guelton, S., McMahon, J.O., Pasquier, F.X., P\u00e9an, G., Villalon, P.: Par4All: from convex array regions to heterogeneous computing. In: 2nd International Workshop on Polyhedral Compilation Techniques, Paris, France, Jan 2012"},{"key":"42_CR3","doi-asserted-by":"crossref","unstructured":"Lee, S., Eigenmann, R.: OpenMPC: extended openMP programming and tuning for GPUs. In: Proceedings of the 2010 ACM\/IEEE Conference on Supercomputing, November 2010, pp. 1\u201311 (2010)","DOI":"10.1109\/SC.2010.36"},{"key":"42_CR4","first-page":"1756","volume-title":"Encyclopedia of Parallel Computing","author":"B Meister","year":"2011","unstructured":"Meister, B., Vasilache, N., Wohlford, D., Baskaran, M.M., Leung, A., Lethin, R.: R-stream compiler. In: Padua, D. (ed.) Encyclopedia of Parallel Computing, pp. 1756\u20131765. Springer, Heidelberg (2011)"},{"issue":"4","key":"42_CR5","doi-asserted-by":"publisher","first-page":"54:1","DOI":"10.1145\/2400682.2400713","volume":"9","author":"S Verdoolaege","year":"2013","unstructured":"Verdoolaege, S., Juega, J.C., Cohen, A., G\u00f3mez, J.I., Tenllado, C., Catthoor, F.: Polyhedral parallel code generation for CUDA. ACM Trans. Archit. Code Optim. 9(4), 54:1\u201354:23 (2013)","journal-title":"ACM Trans. Archit. Code Optim."},{"key":"42_CR6","doi-asserted-by":"crossref","unstructured":"Unat, D., Cai, X., Baden, S.B.: Mint: realizing CUDA performance in 3D Stencil methods with Annotated C. In: Proceedings of the International Conference on Supercomputing, pp. 214\u2013224 (2011)","DOI":"10.1145\/1995896.1995932"},{"key":"42_CR7","unstructured":"The OpenACC Application Programming Interface, Version 1.0 (2011)"},{"key":"42_CR8","unstructured":"OpenMP Application Program Interface, Version 3.1 (2011)"},{"key":"42_CR9","unstructured":"Dong, F.: A General Toolkit for \u201cGPUtilisation\u201d in SME Applications. http:\/\/www.gp-sme.eu\/ (2013). Accessed Oct 2013"},{"key":"42_CR10","doi-asserted-by":"crossref","unstructured":"Lee, S., Vetter, J.S.: Early evaluation of directive-based GPU programming models for productive exascale computing. In: Proceedings of the International Conference on High Performance Computing, Article 23 (2012)","DOI":"10.1109\/SC.2012.51"},{"key":"42_CR11","unstructured":"Pouchet, L-N.: PolyBench: The Polyhedral Benchmark suite (2011), Version 3.2. http:\/\/www.cs.ucla.edu\/~pouchet\/software\/polybench\/ (2011)"},{"key":"42_CR12","doi-asserted-by":"crossref","unstructured":"Grauer-Gray, S., Xu, L., Searles, R., Ayalasomayajula, S., Cavazos, J.: Auto-tuning a high-level language targeted to GPU codes. In: Proceedings of Innovative Parallel Computing, pp. 1\u201310 (2012)","DOI":"10.1109\/InPar.2012.6339595"},{"key":"42_CR13","doi-asserted-by":"publisher","first-page":"976","DOI":"10.1016\/j.procs.2012.04.104","volume":"9","author":"J Zhou","year":"2012","unstructured":"Zhou, J., Unat, D., Choi, D.J., Guest, C.C., Cui, Y.: Hands-on performance tuning of 3D finite difference earthquake simulation on GPU fermi chipset. Procedia Comput. Sci. 9, 976\u2013985 (2012)","journal-title":"Procedia Comput. Sci."},{"key":"42_CR14","doi-asserted-by":"crossref","unstructured":"Fang, J., Varbanescu, A.L., Sips, H.: A comprehensive performance comparison of CUDA and OpenCL. In: Proceedings of the Parallel Processing, pp. 216\u2013225 (2011)","DOI":"10.1109\/ICPP.2011.45"},{"key":"42_CR15","unstructured":"Komatsu, K., Sato, K., Arai, Y., Koyama, K., Takizawa, H., Kobayashi, H.: Evaluating performance and portability of OpenCL programs. In: Proceedings of the Automatic Performance Tuning (2010)"},{"issue":"10","key":"42_CR16","doi-asserted-by":"publisher","first-page":"1370","DOI":"10.1016\/j.jpdc.2008.05.014","volume":"68","author":"S Che","year":"2008","unstructured":"Che, S., Boyer, M., Meng, J., Tarjan, D., Sheaffer, J.W., Skadron, K.: A performance study of general-purpose applications on graphics processors using cuda. J. Parallel Distrib. Comput. 68(10), 1370\u20131380 (2008)","journal-title":"J. Parallel Distrib. Comput."},{"key":"42_CR17","doi-asserted-by":"crossref","unstructured":"Magni, A., Grewe, D., Johnson, N.: Input-aware auto-tuning for directive-based GPU programming. In: Proceedings of the 6th Workshop on General Purpose Processor Using Graphic Processing Units, pp. 66\u201375 (2013)","DOI":"10.1145\/2458523.2458530"},{"key":"42_CR18","doi-asserted-by":"crossref","unstructured":"Reyes, R.N., Lopez, I., Fumero, J.J., de Sande, F.: Directive-based programming for GPUs: a comparative study. In: IEEE 9th International Conference on Embedded Software and Systems (HPCC-ICESS) (2012)","DOI":"10.1109\/HPCC.2012.62"},{"key":"42_CR19","series-title":"LNCS","first-page":"859","volume-title":"Euro-Par 2012","author":"S Wienke","year":"2012","unstructured":"Wienke, S., Springer, P., Terboven, C., an Mey, D.: OpenACC \u2014 First experiences with real-world applications. In: Kaklamanis, C., Papatheodorou, T., Spirakis, P.G. (eds.) Euro-Par 2012. LNCS, vol. 7484, pp. 859\u2013870. Springer, Heidelberg (2012)"},{"key":"42_CR20","doi-asserted-by":"crossref","unstructured":"Herdman, J.A., Gaudin, W.P., McIntosh-Smith, S., Boulton, M., Beckingsale, D.A., Mallinson, A.C., Jarvis, S.A.: Accelerating hydrocodes with OpenACC, OpeCL and CUDA. In: Proceedings of the High Performance Computing, Networking, Storage and Analysis (SCC), pp. 465\u2013471 (2012)","DOI":"10.1109\/SC.Companion.2012.66"}],"container-title":["Lecture Notes in Computer Science","Parallel Processing and Applied Mathematics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-55224-3_42","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,14]],"date-time":"2023-02-14T15:36:57Z","timestamp":1676389017000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-642-55224-3_42"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783642552236","9783642552243"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-55224-3_42","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"6 May 2014","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}