{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T09:19:17Z","timestamp":1773825557182,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,18]]},"DOI":"10.1145\/3592980.3595307","type":"proceedings-article","created":{"date-parts":[[2023,6,14]],"date-time":"2023-06-14T03:07:44Z","timestamp":1686712064000},"page":"19-26","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Accelerating User-Defined Aggregate Functions (UDAF) with Block-wide Execution and JIT Compilation on GPUs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-7101-1068","authenticated-orcid":false,"given":"Bobbi","family":"Yogatama","sequence":"first","affiliation":[{"name":"University of Wisconsin-Madison, US"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9240-4872","authenticated-orcid":false,"given":"Brandon","family":"Miller","sequence":"additional","affiliation":[{"name":"NVIDIA, US"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5809-8141","authenticated-orcid":false,"given":"Yunsong","family":"Wang","sequence":"additional","affiliation":[{"name":"NVIDIA, US"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9005-1716","authenticated-orcid":false,"given":"Graham","family":"Markall","sequence":"additional","affiliation":[{"name":"NVIDIA, UK"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5777-3657","authenticated-orcid":false,"given":"Jacob","family":"Hemstad","sequence":"additional","affiliation":[{"name":"NVIDIA, US"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1075-1417","authenticated-orcid":false,"given":"Gregory","family":"Kimball","sequence":"additional","affiliation":[{"name":"NVIDIA, US"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0785-2519","authenticated-orcid":false,"given":"Xiangyao","family":"Yu","sequence":"additional","affiliation":[{"name":"University of Wisconsin-Madison, US"}]}],"member":"320","published-online":{"date-parts":[[2023,6,18]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2022. A Lightweight LLVM Python Binding for Writing JIT Compilers. https:\/\/pypi.org\/project\/llvmlite\/."},{"key":"e_1_3_2_1_2_1","unstructured":"2022. BlazingSQL. https:\/\/blazingsql.com."},{"key":"e_1_3_2_1_3_1","unstructured":"2022. CUB Documentation. https:\/\/nvlabs.github.io\/cub\/."},{"key":"e_1_3_2_1_4_1","unstructured":"2022. CUDA C Programming Guide. http:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/index.html."},{"key":"e_1_3_2_1_5_1","unstructured":"2022. cuDF- GPU DataFrame Library. https:\/\/github.com\/rapidsai\/cudf."},{"key":"e_1_3_2_1_6_1","unstructured":"2022. cuDF- Performance Comparison. https:\/\/github.com\/rapidsai\/cudf\/blob\/branch-23.04\/docs\/cudf\/source\/user_guide\/performance_comparisons.ipynb."},{"key":"e_1_3_2_1_7_1","unstructured":"2022. Extending Numba. https:\/\/numba.readthedocs.io\/en\/latest\/extending\/index.html."},{"key":"e_1_3_2_1_8_1","unstructured":"2022. Kinetica. https:\/\/kinetica.com\/."},{"key":"e_1_3_2_1_9_1","unstructured":"2022. Numba. https:\/\/numba.pydata.org\/."},{"key":"e_1_3_2_1_10_1","unstructured":"2022. NVComp. https:\/\/github.com\/NVIDIA\/nvcomp."},{"key":"e_1_3_2_1_11_1","unstructured":"2022. NVIDIA H100 Tensor Core GPU. https:\/\/www.nvidia.com\/en-us\/data-center\/h100\/."},{"key":"e_1_3_2_1_12_1","unstructured":"2022. OmniSci. https:\/\/omnisci.com."},{"key":"e_1_3_2_1_13_1","unstructured":"2022. Opencl. https:\/\/www.khronos.org\/opencl\/."},{"key":"e_1_3_2_1_14_1","unstructured":"2022. Pandarallel. https:\/\/nalepae.github.io\/pandarallel\/."},{"key":"e_1_3_2_1_15_1","unstructured":"2022. Pyjion - A drop-in JIT Compiler for Python 3.10. https:\/\/www.trypyjion.com\/."},{"key":"e_1_3_2_1_16_1","unstructured":"2022. User Defined Aggregate Functions (UDAFs). https:\/\/docs.oracle.com\/cd\/B10501_01\/appdev.920\/a96595\/dci11agg.htm."},{"key":"e_1_3_2_1_17_1","unstructured":"2022. User Defined Aggregate Functions (UDAFs). https:\/\/spark.apache.org\/docs\/latest\/sql-ref-functions-udf-aggregate.html."},{"key":"e_1_3_2_1_18_1","unstructured":"2022. User Defined Aggregates. https:\/\/www.postgresql.org\/docs\/current\/xaggr.html."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/s13222-014-0164-z"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-018-0512-y"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.14778\/3303753.3303760"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/1142473.1142480"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3183734"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3183734"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.14778\/3380750.3380758"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Naga Govindaraju 2006. GPUTeraSort: high performance graphics co-processor sorting for large database management. In SIGMOD.","DOI":"10.1145\/1142473.1142511"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/1620585.1620588"},{"key":"e_1_3_2_1_28_1","unstructured":"Bingsheng He Ke Yang Rui Fang Mian Lu Naga Govindaraju Qiong Luo and Pedro Sander. 2008. Relational joins on graphics processors. In SIGMOD."},{"key":"e_1_3_2_1_29_1","volume-title":"Revisiting co-processing for hash joins on the coupled cpu-gpu architecture. PVLDB","author":"He Jiong","year":"2013","unstructured":"Jiong He, Mian Lu, and Bingsheng He. 2013. Revisiting co-processing for hash joins on the coupled cpu-gpu architecture. PVLDB (2013)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.14778\/2735496.2735497"},{"key":"e_1_3_2_1_31_1","volume-title":"Hardware-oblivious parallelism for in-memory column-stores. PVLDB","author":"Heimel Max","year":"2013","unstructured":"Max Heimel, Michael Saecker, Holger Pirk, Stefan Manegold, and Volker Markl. 2013. Hardware-oblivious parallelism for in-memory column-stores. PVLDB (2013)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Tim Kaldewey Guy Lohman Rene Mueller and Peter Volk. 2012. GPU join processing revisited. In DaMoN.","DOI":"10.1145\/2236584.2236592"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.14778\/3067421.3067423"},{"key":"e_1_3_2_1_34_1","volume-title":"The State of RAPIDS AI. GPU Technical Conference","author":"Kraus Keith","year":"2021","unstructured":"Keith Kraus. 2021. The State of RAPIDS AI. GPU Technical Conference 2021."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.14778\/3007328.3007331"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389705"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3517911"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2903735"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.14778\/3425879.3425890"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.14778\/3425879.3425890"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3457254"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.14778\/3436905.3436927"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3085504.3085521"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3526132"},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of the 2020 International Conference on Management of Data. ACM.","author":"Shanbhag Anil","year":"2020","unstructured":"Anil Shanbhag, Xiangyao Yu, and Samuel Madden. 2020. A Study of the Fundamental Performance Charecteristics of GPUs and CPUs for Database Analytics. In Proceedings of the 2020 International Conference on Management of Data. ACM."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485278.2485282"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"crossref","unstructured":"Elias Stehle and Hans-Arno Jacobsen. 2017. A Memory Bandwidth-Efficient Hybrid Radix Sort on GPUs. In SIGMOD. ACM.","DOI":"10.1145\/3035918.3064043"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2012.19"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2017.2677451"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.14778\/3551793.3551809"},{"key":"e_1_3_2_1_52_1","volume-title":"The Yin and Yang of processing data warehousing queries on GPU devices. PVLDB","author":"Yuan Yuan","year":"2013","unstructured":"Yuan Yuan, Rubao Lee, and Xiaodong Zhang. 2013. The Yin and Yang of processing data warehousing queries on GPU devices. PVLDB (2013)."},{"key":"e_1_3_2_1_53_1","volume-title":"Hetero-DB: Next Generation High-Performance Database Systems by Best Utilizing Heterogeneous Computing and Storage Resources. Journal of Computer Science and Technology 30","author":"Zhang Kai","year":"2015","unstructured":"Kai Zhang, Feng Chen, Xiaoning Ding, Yin Huai, Rubao Lee, Tian Luo, Kaibo Wang, Yuan Yuan, and Xiaodong Zhang. 2015. Hetero-DB: Next Generation High-Performance Database Systems by Best Utilizing Heterogeneous Computing and Storage Resources. Journal of Computer Science and Technology 30 (2015)."}],"event":{"name":"SIGMOD\/PODS '23: International Conference on Management of Data","location":"Seattle WA USA","acronym":"SIGMOD\/PODS '23","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 19th International Workshop on Data Management on New Hardware"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3592980.3595307","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3592980.3595307","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:48:01Z","timestamp":1750178881000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3592980.3595307"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,18]]},"references-count":52,"alternative-id":["10.1145\/3592980.3595307","10.1145\/3592980"],"URL":"https:\/\/doi.org\/10.1145\/3592980.3595307","relation":{},"subject":[],"published":{"date-parts":[[2023,6,18]]},"assertion":[{"value":"2023-06-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}