{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T08:56:09Z","timestamp":1775638569236,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,9]],"date-time":"2024-06-09T00:00:00Z","timestamp":1717891200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,10]]},"DOI":"10.1145\/3662010.3663441","type":"proceedings-article","created":{"date-parts":[[2024,5,30]],"date-time":"2024-05-30T10:30:07Z","timestamp":1717065007000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Heterogeneous Intra-Pipeline Device-Parallel Aggregations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5137-8431","authenticated-orcid":false,"given":"Artem","family":"Kroviakov","sequence":"first","affiliation":[{"name":"TUM"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6774-7588","authenticated-orcid":false,"given":"Petr","family":"Kurapov","sequence":"additional","affiliation":[{"name":"Intel Deutschland GmbH"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4181-6535","authenticated-orcid":false,"given":"Christoph","family":"Anneser","sequence":"additional","affiliation":[{"name":"TUM"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1926-3551","authenticated-orcid":false,"given":"Jana","family":"Giceva","sequence":"additional","affiliation":[{"name":"TUM"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,6,9]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2024. Apache Arrow. https:\/\/arrow.apache.org\/ Accessed on 16.02.2024."},{"key":"e_1_3_2_1_2_1","unstructured":"2024. CUDA Driver API. https:\/\/docs.nvidia.com\/cuda\/cuda-driver-api\/ Accessed on 07.02.2024."},{"key":"e_1_3_2_1_3_1","unstructured":"2024. HDK. https:\/\/github.com\/intel-ai\/hdk Accessed on 16.02.2024."},{"key":"e_1_3_2_1_4_1","unstructured":"2024. Heavy.AI. https:\/\/www.heavy.ai\/ Accessed on 07.02.2024."},{"key":"e_1_3_2_1_5_1","unstructured":"2024. Intel Graphics Compiler. https:\/\/github.com\/intel\/intel-graphics-compiler Accessed on 08.02.2024."},{"key":"e_1_3_2_1_6_1","unstructured":"2024. NVIDIA CUDA Compiler. https:\/\/docs.nvidia.com\/cuda\/cuda-compiler-driver-nvcc\/index.html Accessed on 08.02.2024."},{"key":"e_1_3_2_1_7_1","unstructured":"2024. OneAPI Level Zero Specification. https:\/\/spec.oneapi.io\/level-zero\/latest\/index.html Accessed on 07.02.2024."},{"key":"e_1_3_2_1_8_1","unstructured":"2024. Pull request with a transfer scheme improvement proposal. https:\/\/github.com\/intel-ai\/hdk\/pull\/711 Accessed on 07.05.2024."},{"key":"e_1_3_2_1_9_1","unstructured":"2024. SYCL. https:\/\/www.khronos.org\/sycl\/ Accessed on 07.02.2024."},{"key":"e_1_3_2_1_10_1","unstructured":"Mart\u00edn Abadi Ashish Agarwal Paul Barham Eugene Brevdo Zhifeng Chen Craig Citro Greg S. Corrado Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Ian Goodfellow Andrew Harp Geoffrey Irving Michael Isard Yangqing Jia Rafal Jozefowicz Lukasz Kaiser Manjunath Kudlur Josh Levenberg Dan Mane Rajat Monga Sherry Moore Derek Murray Chris Olah Mike Schuster Jonathon Shlens Benoit Steiner Ilya Sutskever Kunal Talwar Paul Tucker Vincent Vanhoucke Vijay Vasudevan Fernanda Viegas Oriol Vinyals Pete Warden Martin Wattenberg Martin Wicke Yuan Yu and Xiaoqiang Zheng. 2016. TensorFlow: Large-Scale Machine Learning on Heterogeneous Distributed Systems. arXiv:1603.04467 [cs.DC]"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","unstructured":"Hartwig Anzt Yuhsiang M. Tsai Ahmad Abdelfattah Terry Cojean and Jack Dongarra. 2020. Evaluating the Performance of NVIDIA's A100 Ampere GPU for Sparse and Batched Computations. In 2020 IEEE\/ACM Performance Modeling Benchmarking and Simulation of High Performance Computer Systems (PMBS). 26--38. https:\/\/doi.org\/10.1109\/PMBS51919.2020.00009","DOI":"10.1109\/PMBS51919.2020.00009"},{"key":"e_1_3_2_1_12_1","volume-title":"Joseph James Gebis, Parry Husbands, Kurt Keutzer, David A. Patterson, William Lester Plishker, John Shalf, Samuel Webb Williams, and Katherine A. Yelick.","author":"Asanovi\u0107 Krste","year":"2006","unstructured":"Krste Asanovi\u0107, Ras Bodik, Bryan Christopher Catanzaro, Joseph James Gebis, Parry Husbands, Kurt Keutzer, David A. Patterson, William Lester Plishker, John Shalf, Samuel Webb Williams, and Katherine A. Yelick. 2006. The Landscape of Parallel Computing Research: A View from Berkeley. Technical Report UCB\/EECS-2006-183. EECS Department, University of California, Berkeley. http:\/\/www2.eecs.berkeley.edu\/Pubs\/TechRpts\/2006\/EECS-2006-183.html"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2013.6544839"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.14778\/2536274.2536325"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2882936"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.14778\/2733004.2733042"},{"key":"e_1_3_2_1_17_1","volume-title":"Advances in Databases and Information Systems, Barbara Catania, Giovanna Guerrini, and Jaroslav Pokorn\u00fd (Eds.)","author":"Bre\u00df Sebastian","unstructured":"Sebastian Bre\u00df, Norbert Siegmund, Ladjel Bellatreche, and Gunter Saake. 2013. An Operator-Stream-Based Scheduling Engine for Effective GPU Coprocessing. In Advances in Databases and Information Systems, Barbara Catania, Giovanna Guerrini, and Jaroslav Pokorn\u00fd (Eds.). Springer Berlin Heidelberg, Berlin, Heidelberg, 288--301."},{"key":"e_1_3_2_1_18_1","volume-title":"A framework for cost based optimization of hybrid CPU\/GPU query plans in database systems. Control and Cybernetics 41 (01","author":"Bre\u00df S.","year":"2012","unstructured":"S. Bre\u00df, Igor Geist, E. Schallehn, M. Mory, and Gunter Saake. 2012. A framework for cost based optimization of hybrid CPU\/GPU query plans in database systems. Control and Cybernetics 41 (01 2012), 715--742."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-45761-0_1"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.14778\/3632093.3632107"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.14778\/3303753.3303760"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3399666.3399907"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.14778\/3352063.3352137"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3076113.3076119"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42614.2022.9731673"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/93597.98720"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2011.5762730"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1620585.1620588"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.14778\/2536206.2536216"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.14778\/2536360.2536370"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589350"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3108404"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.14778\/3067421.3067423"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2882906"},{"key":"e_1_3_2_1_35_1","volume-title":"Analytical Queries: A Comprehensive Survey. arXiv:2311.15730 [cs.DB]","author":"Kurapov Petr","year":"2023","unstructured":"Petr Kurapov and Areg Melik-Adamyan. 2023. Analytical Queries: A Comprehensive Survey. arXiv:2311.15730 [cs.DB]"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2588555.2610507"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2928289"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.14778\/3007328.3007331"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389705"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2788396"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/2619228.2619230"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","unstructured":"Yasuhito Ogata Toshio Endo Naoya Maruyama and Satoshi Matsuoka. 2008. An efficient model-based CPU-GPU heterogeneous FFT library. (2008) 1--10. https:\/\/doi.org\/10.1109\/IPDPS.2008.4536163","DOI":"10.1109\/IPDPS.2008.4536163"},{"key":"e_1_3_2_1_43_1","volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","year":"1912","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas K\u00f6pf, Edward Yang, Zach DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. arXiv:1912.01703 [cs.LG]"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2915224"},{"key":"e_1_3_2_1_45_1","volume-title":"9th Biennial Conference on Innovative Data Systems Research, CIDR","author":"Periklis Chrysogelos","year":"2019","unstructured":"Chrysogelos Periklis, Panagiotis Sioulas, and Anastasia Ailamaki. 2019. Hardware-conscious Query Processing in GPU-accelerated Analytical Engines. In 9th Biennial Conference on Innovative Data Systems Research, CIDR 2019, Asilomar, CA, USA, January 13-16, 2019, Online Proceedings. www.cidrdb.org. http:\/\/cidrdb.org\/cidr2019\/papers\/p127-chrysogelos-cidr19.pdf"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485126"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3380595"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1177\/10943420231188079"},{"key":"e_1_3_2_1_49_1","volume-title":"Self-Tuning Query Scheduling for Analytical Workloads. In SIGMOD Conference. ACM","author":"Wagner Benjamin","year":"2021","unstructured":"Benjamin Wagner, Andr\u00e9 Kohn, and Thomas Neumann. 2021. Self-Tuning Query Scheduling for Analytical Workloads. In SIGMOD Conference. ACM, 1879--1891."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.14778\/3551793.3551809"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2018.00097"},{"key":"e_1_3_2_1_53_1","volume-title":"FineStream: Fine-Grained Window-Based Stream Processing on CPU-GPU Integrated Architectures. In 2020 USENIX Annual Technical Conference (USENIX ATC 20)","author":"Zhang Feng","year":"2020","unstructured":"Feng Zhang, Lin Yang, Shuhao Zhang, Bingsheng He, Wei Lu, and Xiaoyong Du. 2020. FineStream: Fine-Grained Window-Based Stream Processing on CPU-GPU Integrated Architectures. In 2020 USENIX Annual Technical Conference (USENIX ATC 20). USENIX Association, 633--647. https:\/\/www.usenix.org\/conference\/atc20\/presentation\/zhang-feng"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10619-020-07304-z"}],"event":{"name":"SIGMOD\/PODS '24: International Conference on Management of Data","location":"Santiago AA Chile","acronym":"SIGMOD\/PODS '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 20th International Workshop on Data Management on New Hardware"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3662010.3663441","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3662010.3663441","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T18:05:01Z","timestamp":1755972301000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3662010.3663441"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,9]]},"references-count":54,"alternative-id":["10.1145\/3662010.3663441","10.1145\/3662010"],"URL":"https:\/\/doi.org\/10.1145\/3662010.3663441","relation":{},"subject":[],"published":{"date-parts":[[2024,6,9]]},"assertion":[{"value":"2024-06-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}