{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T04:47:05Z","timestamp":1750308425847,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":19,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,5,13]],"date-time":"2019-05-13T00:00:00Z","timestamp":1557705600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,5,13]]},"DOI":"10.1145\/3318170.3318178","type":"proceedings-article","created":{"date-parts":[[2019,7,1]],"date-time":"2019-07-01T19:23:35Z","timestamp":1562009015000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Exploring Integer Sum Reduction using Atomics on Intel CPU"],"prefix":"10.1145","author":[{"given":"Zheming","family":"Jin","sequence":"first","affiliation":[{"name":"Argonne National Laboratory, Argonne, IL, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hal","family":"Finkel","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, Argonne, IL, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2019,5,13]]},"reference":[{"volume-title":"Parallel & Distributed Processing (IPDPS), 2010 IEEE International Symposium on (pp. 1--12)","author":"Xiao S.","key":"e_1_3_2_1_1_1","unstructured":"Xiao, S. and Feng, W.C., 2010, April. Inter-block GPU communication via fast barrier synchronization. In Parallel & Distributed Processing (IPDPS), 2010 IEEE International Symposium on (pp. 1--12). IEEE"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/564870.564881"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1854273.1854303"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","unstructured":"Stone J.E. Gohara D. and Shi G. 2010. OpenCL: A parallel programming standard for heterogeneous computing systems. Computing in science & engineering 12(3) pp. 66--73.","DOI":"10.5555\/2220077.2220227"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","unstructured":"Munshi A. Gaster B. Mattson T.G. and Ginsburg D. 2011. OpenCL programming guide. Pearson Education.","DOI":"10.5555\/2049883"},{"key":"e_1_3_2_1_6_1","volume-title":"The OpenCL Specification, version 1.2","author":"Khronos OpenCL Working Group","year":"2011","unstructured":"Khronos OpenCL Working Group, The OpenCL Specification, version 1.2, 15 November 2011. Cited on pages, 18(7), p. 30"},{"key":"e_1_3_2_1_7_1","unstructured":"Work-Group Size Considerations OpenCL Developer Guide for Intel Core and Intel Xeon Processors. https:\/\/software.intel.com\/en-us\/node\/540512"},{"key":"e_1_3_2_1_8_1","unstructured":"Threading: Achieving Work-Group Level Parallelism OpenCL Developer Guide for Intel Core and Intel Xeon Processors. https:\/\/software.intel.com\/en-us\/iocl-tec-opg-threading-achieving-work-group-level-parallelism"},{"key":"e_1_3_2_1_9_1","unstructured":"Mark H. 2008. Optimizing parallel reduction in CUDA. NVIDIA CUDA SDK."},{"key":"e_1_3_2_1_10_1","unstructured":"https:\/\/developer.amd.com\/resources\/articles-whitepapers\/opencl-optimization-case-study-simple-reductions\/"},{"volume-title":"High Performance Computing and Simulation (HPCS), 2012 International Conference on (pp. 511--519)","author":"Mart\u00edn P.J.","key":"e_1_3_2_1_11_1","unstructured":"Mart\u00edn, P.J., Ayuso, L.F., Torres, R. and Gavilanes, A., 2012, July. Algorithmic strategies for optimizing the parallel reduction primitive in CUDA. In High Performance Computing and Simulation (HPCS), 2012 International Conference on (pp. 511--519). IEEE."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/2775049.2602993"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2011.34"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3241793.3241809"},{"volume-title":"2018 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW) (pp. 532--539)","author":"Jin Z.","key":"e_1_3_2_1_15_1","unstructured":"Jin, Z. and Finkel, H., 2018, May. Optimizing an Atomics-Based Reduction Kernel on OpenCL FPGA Platform. In 2018 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW) (pp. 532--539). IEEE."},{"volume-title":"2018 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW) (pp. 27--35)","author":"Jin Z.","key":"e_1_3_2_1_16_1","unstructured":"Jin, Z. and Finkel, H., 2018, May. Optimizing Parallel Reduction on OpenCL FPGA Platform -- A Case Study of Frequent Pattern Compression. In 2018 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW) (pp. 27--35). IEEE."},{"volume-title":"Field Programmable Logic and Applications (FPL), 2015 25th International Conference on (pp. 1--8). IEEE.","author":"Wang Z.","key":"e_1_3_2_1_17_1","unstructured":"Wang, Z., He, B. and Zhang, W., 2015, September. A study of data partitioning on OpenCL-based FPGAs. In Field Programmable Logic and Applications (FPL), 2015 25th International Conference on (pp. 1--8). IEEE."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2847263.2847343"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3204919.3204921"}],"event":{"name":"IWOCL'19: International Workshop on OpenCL","sponsor":["Khronos Khronos Group","Northeastern University","Codeplay Codeplay Software Ltd.","Intel Intel","The University of Bristol The University of Bristol"],"location":"Boston MA USA","acronym":"IWOCL'19"},"container-title":["Proceedings of the International Workshop on OpenCL"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3318170.3318178","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3318170.3318178","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T17:49:34Z","timestamp":1750268974000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3318170.3318178"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,5,13]]},"references-count":19,"alternative-id":["10.1145\/3318170.3318178","10.1145\/3318170"],"URL":"https:\/\/doi.org\/10.1145\/3318170.3318178","relation":{},"subject":[],"published":{"date-parts":[[2019,5,13]]},"assertion":[{"value":"2019-05-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}