{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T13:44:10Z","timestamp":1725716650254},"publisher-location":"Berlin, Heidelberg","reference-count":12,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642369483"},{"type":"electronic","value":"9783642369490"}],"license":[{"start":{"date-parts":[[2013,1,1]],"date-time":"2013-01-01T00:00:00Z","timestamp":1356998400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013]]},"DOI":"10.1007\/978-3-642-36949-0_15","type":"book-chapter","created":{"date-parts":[[2013,2,14]],"date-time":"2013-02-14T20:34:27Z","timestamp":1360874067000},"page":"123-132","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Mastering Software Variant Explosion for GPU Accelerators"],"prefix":"10.1007","author":[{"given":"Richard","family":"Membarth","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Frank","family":"Hannig","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"J\u00fcrgen","family":"Teich","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mario","family":"K\u00f6rner","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wieland","family":"Eckert","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"issue":"1","key":"15_CR1","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/S0167-8191(00)00087-9","volume":"27","author":"R. Clint Whaley","year":"2001","unstructured":"Clint Whaley, R., Petitet, A., Dongarra, J.: Automated Empirical Optimizations of Software and the ATLAS Project. Parallel Computing\u00a027(1), 3\u201335 (2001)","journal-title":"Parallel Computing"},{"unstructured":"Czarnecki, K., Eisenecker, U.: Generative Programming: Methods, Tools, and Applications. Addison-Wesley (2000)","key":"15_CR2"},{"issue":"8","key":"15_CR3","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1016\/j.parco.2011.10.002","volume":"38","author":"P. Du","year":"2011","unstructured":"Du, P., Weber, R., Luszczek, P., Tomov, S., Peterson, G., Dongarra, J.: From CUDA to OpenCL: Towards a Performance-portable Solution for Multi-platform GPU Programming. Parallel Computing\u00a038(8), 391\u2013407 (2011)","journal-title":"Parallel Computing"},{"doi-asserted-by":"crossref","unstructured":"Grewe, D., Wang, Z., O\u2019Boyle, M.F.: A Workload-Aware Mapping Approach for Data-Parallel Programs. In: Proceedings of the 6th International Conference on High-Performance and Embedded Architectures and Compilers, HiPEAC, pp. 117\u2013126. ACM (January 2011)","key":"15_CR4","DOI":"10.1145\/1944862.1944881"},{"key":"15_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"884","DOI":"10.1007\/978-3-642-01970-8_89","volume-title":"Computational Science \u2013 ICCS 2009","author":"Y. Li","year":"2009","unstructured":"Li, Y., Dongarra, J., Tomov, S.: A Note on Auto-tuning GEMM for GPUs. In: Allen, G., Nabrzyski, J., Seidel, E., van Albada, G.D., Dongarra, J., Sloot, P.M.A. (eds.) ICCS 2009, Part I. LNCS, vol.\u00a05544, pp. 884\u2013892. Springer, Heidelberg (2009)"},{"doi-asserted-by":"crossref","unstructured":"Membarth, R., Hannig, F., Teich, J., K\u00f6rner, M., Eckert, W.: Automatic Optimization of In-Flight Memory Transactions for GPU Accelerators based on a Domain-Specific Language for Medical Imaging. In: Proceedings of the 11th International Symposium on Parallel and Distributed Computing, ISPDC. IEEE (June 2012)","key":"15_CR6","DOI":"10.1109\/ISPDC.2012.36"},{"doi-asserted-by":"crossref","unstructured":"Membarth, R., Hannig, F., Teich, J., K\u00f6rner, M., Eckert, W.: Generating Device-specific GPU Code for Local Operators in Medical Imaging. In: Proceedings of the 26th IEEE International Parallel & Distributed Processing Symposium, IPDPS, pp. 569\u2013581. IEEE (May 2012)","key":"15_CR7","DOI":"10.1109\/IPDPS.2012.59"},{"doi-asserted-by":"crossref","unstructured":"Pohl, K., B\u00f6ckle, G., Van Der Linden, F.: Software Product Line Engineering: Foundations, Principles, and Techniques. Springer (2005)","key":"15_CR8","DOI":"10.1007\/3-540-28901-1"},{"issue":"10","key":"15_CR9","doi-asserted-by":"publisher","first-page":"1389","DOI":"10.1016\/j.jpdc.2008.05.011","volume":"68","author":"S. Ryoo","year":"2008","unstructured":"Ryoo, S., Rodrigues, C., Stone, S., Stratton, J., Ueng, S., Baghsorkhi, S., Hwu, W.: Program Optimization Carving for GPU Computing. Journal of Parallel and Distributed Computing\u00a068(10), 1389\u20131401 (2008)","journal-title":"Journal of Parallel and Distributed Computing"},{"key":"15_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"438","DOI":"10.1007\/978-3-642-23397-5_43","volume-title":"Euro-Par 2011 Parallel Processing","author":"P. Thoman","year":"2011","unstructured":"Thoman, P., Kofler, K., Studt, H., Thomson, J., Fahringer, T.: Automatic OpenCL Device Characterization: Guiding Optimized Kernel Design. In: Jeannot, E., Namyst, R., Roman, J. (eds.) Euro-Par 2011, Part II. LNCS, vol.\u00a06853, pp. 438\u2013452. Springer, Heidelberg (2011)"},{"doi-asserted-by":"crossref","unstructured":"Wong, H., Papadopoulou, M., Sadooghi-Alvandi, M., Moshovos, A.: Demystifying GPU Microarchitecture through Microbenchmarking. In: Proceedings of the 2010 IEEE International Symposium on Performance Analysis of Systems and Software, ISPASS, pp. 235\u2013246. IEEE (2010)","key":"15_CR11","DOI":"10.1109\/ISPASS.2010.5452013"},{"issue":"2","key":"15_CR12","first-page":"358","volume":"93","author":"K. Yotov","year":"2005","unstructured":"Yotov, K., Li, X., Ren, G., Garzaran, M., Padua, D., Pingali, K., Stodghill, P.: Is Search Really Necessary to Generate High-performance BLAS? Proceedings of the IEEE Special Issue on \u201cProgram Generation, Optimization, and Platform Adaptation\u201d\u00a093(2), 358\u2013386 (2005)","journal-title":"Proceedings of the IEEE Special Issue on \u201cProgram Generation, Optimization, and Platform Adaptation\u201d"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2012: Parallel Processing Workshops"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-36949-0_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,19]],"date-time":"2020-04-19T21:04:51Z","timestamp":1587330291000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-36949-0_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013]]},"ISBN":["9783642369483","9783642369490"],"references-count":12,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-36949-0_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2013]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}