{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T07:59:14Z","timestamp":1743062354027,"version":"3.40.3"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030352240"},{"type":"electronic","value":"9783030352257"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-35225-7_9","type":"book-chapter","created":{"date-parts":[[2019,11,19]],"date-time":"2019-11-19T08:04:56Z","timestamp":1574150696000},"page":"109-126","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Memory-Access-Pattern Analysis Techniques for OpenCL Kernels"],"prefix":"10.1007","author":[{"given":"Gangwon","family":"Jo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jaehoon","family":"Jung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiyoung","family":"Park","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jaejin","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,11,15]]},"reference":[{"key":"9_CR1","unstructured":"AMD: AMD APP SDK OpenCL optimization guide (2015). \nhttp:\/\/amd-dev.wpengine.netdna-cdn.com\/wordpress\/media\/2013\/12\/AMD_OpenCL_Programming_Optimization_Guide2.pdf"},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Ballance, R.A., Maccabe, A.B., Ottenstein, K.J.: The program dependence web: a representation supporting control-, data-, and demand-driven interpretation of imperative languages. In: Proceedings of the ACM SIGPLAN 1990 Conference on Programming Language Design and Implementation, pp. 257\u2013271 (1990)","DOI":"10.1145\/93548.93578"},{"key":"9_CR3","unstructured":"Bauer, M., Cook, H., Khailany, B.: CudaDMA. \nhttp:\/\/lightsighter.github.io\/CudaDMA\/"},{"key":"9_CR4","doi-asserted-by":"crossref","unstructured":"Bauer, M., Cook, H., Khailany, B.: CudaDMA: optimizing GPU memory bandwidth via warp specialization. In: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis (2011)","DOI":"10.1145\/2063384.2063400"},{"key":"9_CR5","doi-asserted-by":"crossref","unstructured":"Bondhugula, U., Hartono, A., Ramanujam, J., Sadayappan, P.: A practical automatic polyhedral parallelizer and locality optimizer. In: Proceedings of the 29th ACM SIGPLAN Conference on Programming Language Design and Implementation, pp. 101\u2013113 (2008)","DOI":"10.1145\/1379022.1375595"},{"issue":"4","key":"9_CR6","doi-asserted-by":"publisher","first-page":"898","DOI":"10.1016\/j.cpc.2010.12.021","volume":"182","author":"WM Brown","year":"2011","unstructured":"Brown, W.M., Wang, P., Plimpton, S.J., Tharrington, A.N.: Implementing molecular dynamics on hybrid high performance computers - short range forces. Comput. Phys. Commun. 182(4), 898\u2013911 (2011)","journal-title":"Comput. Phys. Commun."},{"key":"9_CR7","doi-asserted-by":"crossref","unstructured":"Che, S., et al.: Rodinia: a benchmark suite for heterogeneous computing. In: Proceedings of 2009 IEEE International Symposium on Workload Characterization, pp. 44\u201354 (2009)","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"9_CR8","doi-asserted-by":"crossref","unstructured":"Che, S., Sheaffer, J.W., Skadron, K.: Dymaxion: optimizing memory access patterns for heterogeneous systems. In: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis (2011)","DOI":"10.1145\/2063384.2063401"},{"issue":"4","key":"9_CR9","doi-asserted-by":"publisher","first-page":"451","DOI":"10.1145\/115372.115320","volume":"13","author":"R Cytron","year":"1991","unstructured":"Cytron, R., Ferrante, J., Rosen, B.K., Wegman, M.N., Zadeck, F.K.: Efficiently computing static single assignment form and the control dependence graph. ACM Trans. Program. Lang. Syst. 13(4), 451\u2013490 (1991)","journal-title":"ACM Trans. Program. Lang. Syst."},{"issue":"8","key":"9_CR10","doi-asserted-by":"crossref","first-page":"1073","DOI":"10.1016\/j.media.2013.05.008","volume":"17","author":"A Eklund","year":"2013","unstructured":"Eklund, A., Dufort, P., Forsberg, D., LaConte, S.M.: Medical image processing on the GPU - past, present and future. Med. Image Anal. 17(8), 1073\u20131094 (2013)","journal-title":"Med. Image Anal."},{"issue":"5","key":"9_CR11","doi-asserted-by":"publisher","first-page":"1542","DOI":"10.1021\/ct200909j","volume":"8","author":"AW G\u00f6tz","year":"2012","unstructured":"G\u00f6tz, A.W., Williamson, M.J., Xu, D., Poole, D., Le Grand, S., Walker, R.C.: Routine microsecond molecular dynamics simulations with AMBER on GPUs. 1. generalized born. J. Chem. Theory Comput. 8(5), 1542\u20131555 (2012)","journal-title":"J. Chem. Theory Comput."},{"issue":"4","key":"9_CR12","doi-asserted-by":"publisher","first-page":"1250010","DOI":"10.1142\/S0129626412500107","volume":"22","author":"T Grosser","year":"2012","unstructured":"Grosser, T., Groesslinger, A., Lengauer, C.: Polly - performing polyhedral optimizations on a low-level intermediate representation. Parallel Process. Lett. 22(4), 1250010 (2012)","journal-title":"Parallel Process. Lett."},{"key":"9_CR13","doi-asserted-by":"publisher","first-page":"477","DOI":"10.1145\/233561.233568","volume":"18","author":"MR Haghighat","year":"1996","unstructured":"Haghighat, M.R., Polychronopoulos, C.D.: Symbolic analysis for parallelizing compilers. ACM Trans. Program. Lang. Syst. 18, 477\u2013518 (1996)","journal-title":"ACM Trans. Program. Lang. Syst."},{"issue":"1","key":"9_CR14","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1109\/TPDS.2010.107","volume":"22","author":"B Jang","year":"2011","unstructured":"Jang, B., Schaa, D., Mistry, P., Kaeli, D.: Exploiting memory access patterns to improve memory performance in data parallel architectures. IEEE Trans. Parallel Distrib. Syst. 22(1), 105\u2013118 (2011)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"9_CR15","unstructured":"Khronos Group: SPIR generator\/Clang. \nhttps:\/\/github.com\/KhronosGroup\/SPIR"},{"key":"9_CR16","doi-asserted-by":"crossref","unstructured":"Kim, J., Kim, H., Lee, J.H., Lee, J.: Achieving a single compute device image in OpenCL for multiple GPUs. In: Proceedings of the 16th ACM Symposium on Principles and Practice of Parallel Programming, pp. 277\u2013288 (2011)","DOI":"10.1145\/1941553.1941591"},{"key":"9_CR17","unstructured":"NVIDIA: cuDNN. \nhttps:\/\/developer.nvidia.com\/cudnn"},{"key":"9_CR18","unstructured":"NVIDIA: CUDA C best practices guide (2015). \nhttp:\/\/docs.nvidia.com\/cuda\/cuda-c-best-practices-guide\/"},{"key":"9_CR19","unstructured":"Pop, S., Cohen, A., Bastoul, C., Girbal, S., Silber, G.A., Vasilache, N.: GRAPHITE: polyhedral analyses and optimizations for GCC. In: Proceedings of the 2006 GCC Developers Summit (2006)"},{"issue":"1","key":"9_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1471-2105-8-474","volume":"8","author":"MC Schatz","year":"2007","unstructured":"Schatz, M.C., Trapnell, C., Delcher, A.L., Varshney, A.: High-throughput sequence alignment using graphics processing units. BMC Bioinform. 8(1), 1\u201310 (2007)","journal-title":"BMC Bioinform."},{"key":"9_CR21","unstructured":"Seo, S., Lee, J., Jo, G., Lee, J.: Automatic OpenCL work-group size selection for multicore CPUs. In: Proceedings of the 22nd International Conference on Parallel Architectures and Compilation Techniques, pp. 387\u2013397 (2013)"},{"key":"9_CR22","doi-asserted-by":"crossref","unstructured":"Steensgaard, B.: Points-to analysis in almost linear time. In: Proceedings of the 23rd ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, pp. 32\u201341 (1996)","DOI":"10.1145\/237721.237727"},{"key":"9_CR23","doi-asserted-by":"crossref","unstructured":"Stratton, J.A., et al.: Optimization and architecture effects on GPU computing workload performance. In: Proceedings of Innovative Parallel Computing (InPar) (2012)","DOI":"10.1109\/InPar.2012.6339605"},{"key":"9_CR24","unstructured":"Stratton, J.A., et al.: Parboil: a revised benchmark suite for scientific and commercial throughput computing. Technical report, IMPACT-12-01, IMPACT, University of Illinois at Urbana-Champaign (2012)"},{"key":"9_CR25","unstructured":"Tal, B.N., Levy, E., Barak, A., Rubin, E.: Memory access patterns: the missing piece of the multi-GPU puzzle. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (2015)"},{"issue":"5\u20136","key":"9_CR26","doi-asserted-by":"publisher","first-page":"232","DOI":"10.1016\/j.parco.2009.12.005","volume":"36","author":"S Tomov","year":"2010","unstructured":"Tomov, S., Dongarra, J., Baboulin, M.: Towards dense linear algebra for hybrid GPU accelerated manycore systems. Parallel Comput. 36(5\u20136), 232\u2013240 (2010)","journal-title":"Parallel Comput."},{"key":"9_CR27","doi-asserted-by":"crossref","unstructured":"Tu, P., Padua, D.: Gated SSA-based demand-driven symbolic analysis for parallelizing compilers. In: Proceedings of the 9th International Conference on Supercomputing, pp. 414\u2013423 (1995)","DOI":"10.1145\/224538.224648"}],"container-title":["Lecture Notes in Computer Science","Languages and Compilers for Parallel Computing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-35225-7_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,11,19]],"date-time":"2019-11-19T18:04:14Z","timestamp":1574186654000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-35225-7_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030352240","9783030352257"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-35225-7_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"15 November 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"LCPC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Languages and Compilers for Parallel Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"College Station, TX","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 October 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"lcpc2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/parasol.tamu.edu\/lcpc2017\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Hotcrp.com","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"13","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"54% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}