{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:34:18Z","timestamp":1750221258063,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":20,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,6,20]],"date-time":"2018-06-20T00:00:00Z","timestamp":1529452800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100011030","name":"U.S. Department of Energy","doi-asserted-by":"publisher","award":["DE-AC02-06CH11357"],"award-info":[{"award-number":["DE-AC02-06CH11357"]}],"id":[{"id":"10.13039\/100011030","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,6,20]]},"DOI":"10.1145\/3241793.3241809","type":"proceedings-article","created":{"date-parts":[[2018,10,2]],"date-time":"2018-10-02T12:09:29Z","timestamp":1538482169000},"page":"1-7","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["A Case Study of Integer Sum Reduction using Atomics"],"prefix":"10.1145","author":[{"given":"Zheming","family":"Jin","sequence":"first","affiliation":[{"name":"Argonne National Laboratory, Lemont, IL"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hal","family":"Finkel","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Lemont, IL"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2018,6,20]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/2688500.2688501"},{"key":"e_1_3_2_1_2_1","unstructured":"Intel FPGA SDK for OpenCL Programming Guide. UG-OCL002. 2017.05.08  Intel FPGA SDK for OpenCL Programming Guide. UG-OCL002. 2017.05.08"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2015.2513673"},{"key":"e_1_3_2_1_4_1","volume-title":"eds","author":"Koch D.","year":"2016","unstructured":"Koch , D. , Hannig , F. and Ziener , D . eds ., 2016 . FPGAs for Software Programmers. Springer . Koch, D., Hannig, F. and Ziener, D. eds., 2016. FPGAs for Software Programmers. Springer."},{"key":"e_1_3_2_1_5_1","unstructured":"https:\/\/developer.amd.com\/resources\/articles-whitepapers\/opencl-optimization-case-study-simple-reductions\/  https:\/\/developer.amd.com\/resources\/articles-whitepapers\/opencl-optimization-case-study-simple-reductions\/"},{"volume-title":"Parallel & Distributed Processing (IPDPS), 2010 IEEE International Symposium on (pp. 1--12)","author":"Xiao S.","key":"e_1_3_2_1_6_1","unstructured":"Xiao , S. and Feng , W.C ., 2010, April. Inter-block GPU communication via fast barrier synchronization . In Parallel & Distributed Processing (IPDPS), 2010 IEEE International Symposium on (pp. 1--12) . IEEE. Xiao, S. and Feng, W.C., 2010, April. Inter-block GPU communication via fast barrier synchronization. In Parallel & Distributed Processing (IPDPS), 2010 IEEE International Symposium on (pp. 1--12). IEEE."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/564870.564881"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1854273.1854303"},{"key":"e_1_3_2_1_9_1","unstructured":"https:\/\/software.intel.com\/en-us\/articles\/using-opencl-20-atomics  https:\/\/software.intel.com\/en-us\/articles\/using-opencl-20-atomics"},{"key":"e_1_3_2_1_10_1","volume-title":"The OpenCL Specification, version 1.2","author":"Khronos OpenCL Working Group","year":"2011","unstructured":"Khronos OpenCL Working Group , The OpenCL Specification, version 1.2 , 15 November 2011 . Cited on pages, 18(7), p.30 Khronos OpenCL Working Group, The OpenCL Specification, version 1.2, 15 November 2011. Cited on pages, 18(7), p.30"},{"key":"e_1_3_2_1_11_1","unstructured":"Kirk D.B. and Wen-Mei W.H. 2016. Programming massively parallel processors: a hands-on approach. Morgan Kaufmann.   Kirk D.B. and Wen-Mei W.H. 2016. Programming massively parallel processors: a hands-on approach. Morgan Kaufmann."},{"key":"e_1_3_2_1_12_1","unstructured":"Mark H. 2008. Optimizing parallel reduction in CUDA. NVIDIA CUDA SDK.  Mark H. 2008. Optimizing parallel reduction in CUDA. NVIDIA CUDA SDK."},{"key":"e_1_3_2_1_13_1","unstructured":"https:\/\/developer.amd.com\/resources\/articles-whitepapers\/opencl-optimization-case-study-simple-reductions\/  https:\/\/developer.amd.com\/resources\/articles-whitepapers\/opencl-optimization-case-study-simple-reductions\/"},{"volume-title":"High Performance Computing and Simulation (HPCS), 2012 International Conference on (pp. 511--519)","author":"Mart\u00edn P.J.","key":"e_1_3_2_1_14_1","unstructured":"Mart\u00edn , P.J. , Ayuso , L.F. , Torres , R. and Gavilanes , A ., 2012, July. Algorithmic strategies for optimizing the parallel reduction primitive in CUDA . In High Performance Computing and Simulation (HPCS), 2012 International Conference on (pp. 511--519) . IEEE. Mart\u00edn, P.J., Ayuso, L.F., Torres, R. and Gavilanes, A., 2012, July. Algorithmic strategies for optimizing the parallel reduction primitive in CUDA. In High Performance Computing and Simulation (HPCS), 2012 International Conference on (pp. 511--519). IEEE."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2775049.2602993"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2011.34"},{"volume-title":"Parallel and Distributed Processing Symposium, 2005. Proceedings. 19th IEEE International (pp. 8-pp). IEEE.","author":"Zhuo L.","key":"e_1_3_2_1_17_1","unstructured":"Zhuo , L. , Morris , G.R. and Prasanna , V.K ., 2005, April. Designing scalable FPGA-based reduction circuits using pipelined floatingpoint cores . In Parallel and Distributed Processing Symposium, 2005. Proceedings. 19th IEEE International (pp. 8-pp). IEEE. Zhuo, L., Morris, G.R. and Prasanna, V.K., 2005, April. Designing scalable FPGA-based reduction circuits using pipelined floatingpoint cores. In Parallel and Distributed Processing Symposium, 2005. Proceedings. 19th IEEE International (pp. 8-pp). IEEE."},{"key":"e_1_3_2_1_18_1","unstructured":"http:\/\/svenssonjoel.github.io\/writing\/zynqreduce.pdf  http:\/\/svenssonjoel.github.io\/writing\/zynqreduce.pdf"},{"volume-title":"Field Programmable Logic and Applications (FPL), 2015 25th International Conference on (pp. 1--8). IEEE.","author":"Wang Z.","key":"e_1_3_2_1_19_1","unstructured":"Wang , Z. , He , B. and Zhang , W ., 2015, September. A study of data partitioning on OpenCL-based FPGAs . In Field Programmable Logic and Applications (FPL), 2015 25th International Conference on (pp. 1--8). IEEE. Wang, Z., He, B. and Zhang, W., 2015, September. A study of data partitioning on OpenCL-based FPGAs. In Field Programmable Logic and Applications (FPL), 2015 25th International Conference on (pp. 1--8). IEEE."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2847263.2847343"}],"event":{"name":"HEART 2018: The 9th International Symposium on Highly-Efficient Accelerators and Reconfigurable Technologies","acronym":"HEART 2018","location":"Toronto ON Canada"},"container-title":["Proceedings of the 9th International Symposium on Highly-Efficient Accelerators and Reconfigurable Technologies"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3241793.3241809","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3241793.3241809","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3241793.3241809","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T02:08:11Z","timestamp":1750212491000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3241793.3241809"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,6,20]]},"references-count":20,"alternative-id":["10.1145\/3241793.3241809","10.1145\/3241793"],"URL":"https:\/\/doi.org\/10.1145\/3241793.3241809","relation":{},"subject":[],"published":{"date-parts":[[2018,6,20]]},"assertion":[{"value":"2018-06-20","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}