{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T09:19:04Z","timestamp":1773825544886,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,22]]},"DOI":"10.1145\/3736227.3736232","type":"proceedings-article","created":{"date-parts":[[2025,7,10]],"date-time":"2025-07-10T08:03:02Z","timestamp":1752134582000},"page":"1-9","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Path to GPU-Initiated I\/O for Data-Intensive Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-8346-2260","authenticated-orcid":false,"given":"Karl B.","family":"Torp","sequence":"first","affiliation":[{"name":"Samsung, Copenhagen, Denmark"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1942-3818","authenticated-orcid":false,"given":"Simon A. F.","family":"Lund","sequence":"additional","affiliation":[{"name":"Samsung, Copenhagen, Denmark"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6838-4854","authenticated-orcid":false,"given":"P\u0131nar","family":"T\u00f6z\u00fcn","sequence":"additional","affiliation":[{"name":"IT University of Copenhagen, Copenhagen, Denmark"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,10]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1145\/3662010.3663450"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/3620678.3624666"},{"key":"e_1_3_3_2_4_2","unstructured":"Tom Augspurger. 2025. High-Performance Remote IO With NVIDIA KvikIO. https:\/\/developer.nvidia.com\/blog\/high-performance-remote-io-with-nvidia-kvikio\/ Last accessed: 19\/05\/2025."},{"key":"e_1_3_3_2_5_2","unstructured":"BaM. [n. d.]. BaM GitHub Repository. https:\/\/github.com\/ZaidQureshi\/bam Last accessed: 25\/11\/2024."},{"key":"e_1_3_3_2_6_2","volume-title":"JAX: composable transformations of Python+NumPy programs","author":"Bradbury James","year":"2018","unstructured":"James Bradbury, Roy Frostig, Peter Hawkins, Matthew\u00a0James Johnson, Chris Leary, Dougal Maclaurin, George Necula, Adam Paszke, Jake VanderPlas, Skye Wanderman-Milne, and Qiao Zhang. 2018. JAX: composable transformations of Python+NumPy programs. http:\/\/github.com\/jax-ml\/jax"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","unstructured":"Jiashen Cao Rathijit Sen Matteo Interlandi Joy Arulraj and Hyesoon Kim. 2023. GPU Database Systems Characterization and Optimization. Proc. VLDB Endow. 17 3 (Nov. 2023) 441\u2013454. 10.14778\/3632093.3632107","DOI":"10.14778\/3632093.3632107"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651353"},{"key":"e_1_3_3_2_9_2","unstructured":"Jonathan Corbet. [n. d.]. Making life (even) harder for proprietary modules. https:\/\/lwn.net\/Articles\/939842\/ Last accessed: 25\/11\/2024."},{"key":"e_1_3_3_2_10_2","unstructured":"Jonathon Evans Ian Finder Ivan Goldwasser John Linford Vishal Mehta Daniel Ruiz and Mathias Wagner. 2023. NVIDIA Grace CPU Superchip Architecture In Depth. https:\/\/developer.nvidia.com\/blog\/nvidia-grace-cpu-superchip-architecture-in-depth\/"},{"key":"e_1_3_3_2_11_2","unstructured":"FIO. [n. d.]. Flexible I\/O Tester. https:\/\/fio.readthedocs.io\/en\/latest\/fio_doc.html Last accessed: 05\/12\/2024."},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3183734"},{"key":"e_1_3_3_2_13_2","unstructured":"GMT. [n. d.]. GMT GitHub Repository. https:\/\/github.com\/lineagech\/GMT Last accessed: 10\/12\/2024."},{"key":"e_1_3_3_2_14_2","unstructured":"GPUfs. [n. d.]. GPUfs GitHub Repository. https:\/\/github.com\/gpufs\/gpufs Last accessed: 05\/12\/2024."},{"key":"e_1_3_3_2_15_2","volume-title":"9th Conference on Innovative Data Systems Research, CIDR 2020, Amsterdam, The Netherlands, January 12-15, 2020","author":"Haas Gabriel","year":"2020","unstructured":"Gabriel Haas, Michael Haubenschild, and Viktor Leis. 2020. Exploiting Directly-Attached NVMe Arrays in DBMS. In 9th Conference on Innovative Data Systems Research, CIDR 2020, Amsterdam, The Netherlands, January 12-15, 2020. www.cidrdb.org."},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","unstructured":"Gabriel Haas and Viktor Leis. 2023. What Modern NVMe Storage Can Do and How to Exploit it: High-Performance I\/O for High-Performance Storage Engines. Proc. VLDB Endow. 16 9 (5 2023) 2090\u20132102. 10.14778\/3598581.3598584","DOI":"10.14778\/3598581.3598584"},{"key":"e_1_3_3_2_17_2","volume-title":"CHEOPS","author":"Hoozemans Joost","year":"2025","unstructured":"Joost Hoozemans, Robin Vonk, Johan Peltenburg, Felipe Aramburu, and Zaid Al-Ars. 2025. Using GPU Direct Storage with High-Performance Distributed Filesystems. In CHEOPS (Rotterdam, Netherlands)."},{"key":"e_1_3_3_2_18_2","unstructured":"Intel. [n. d.]. Optane SSD DC P5800X Series. https:\/\/www.intel.com\/content\/www\/us\/en\/products\/sku\/201859\/intel-optane-ssd-dc-p5800x-series-1-6tb-2-5in-pcie-x4-3d-xpoint\/specifications.html Last accessed: 10\/12\/2024."},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/NAS55553.2022.9925516"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3624062.3624168"},{"key":"e_1_3_3_2_21_2","series-title":"(FAST \u201924)","volume-title":"Proceedings of the 22nd USENIX Conference on File and Storage Technologies","author":"Joshi Kanchan","year":"2024","unstructured":"Kanchan Joshi, Anuj Gupta, Javier Gonz\u00e1lez, Ankit Kumar, Krishna\u00a0Kanth Reddy, Arun George, Simon Lund, and Jens Axboe. 2024. I\/O Passthru: upstreaming a flexible and efficient I\/O path in Linux. In Proceedings of the 22nd USENIX Conference on File and Storage Technologies (Santa Clara, CA, USA) (FAST \u201924). USENIX Association, USA, Article 7, 16\u00a0pages."},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3662010.3663441"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","unstructured":"Maximilian Kuschewski Jana Giceva Thomas Neumann and Viktor Leis. 2024. High-Performance Query Processing with NVMe Arrays: Spilling without Killing Performance. Proc. ACM Manag. Data 2 6 Article 238 (Dec. 2024) 27\u00a0pages. 10.1145\/3698813","DOI":"10.1145\/3698813"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"crossref","unstructured":"Alberto Lerner and Gustavo Alonso. 2024. CXL and the Return of Scale-Up Database Engines. Proc. VLDB Endow. 17 10 (2024) 2568\u20132575. https:\/\/www.vldb.org\/pvldb\/vol17\/p2568-lerner.pdf","DOI":"10.14778\/3675034.3675047"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3578835"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3534056.3534936"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3517842"},{"key":"e_1_3_3_2_28_2","unstructured":"Jonas Markussen. [n. d.]. libnvm: An API for building userspace NVMe drivers and storage applications. https:\/\/github.com\/enfiskutensykkel\/ssd-gpu-dma Last accessed: 25\/11\/2024."},{"key":"e_1_3_3_2_29_2","unstructured":"Nestor Maslej Loredana Fattorini Raymond Perrault Vanessa Parli Anka Reuel Erik Brynjolfsson John Etchemendy Katrina Ligett Terah Lyons James Manyika Juan\u00a0Carlos Niebles Yoav Shoham Russell Wald and Jack Clark. 2024. Artificial Intelligence Index Report 2024. arxiv:https:\/\/arXiv.org\/abs\/2405.19522\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2405.19522"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","unstructured":"Jayashree Mohan Amar Phanishayee Ashish Raniwala and Vijay Chidambaram. 2021. Analyzing and Mitigating Data Stalls in DNN Training. Proc. VLDB Endow. 14 5 (1 2021) 771\u2013784. 10.14778\/3446095.3446100","DOI":"10.14778\/3446095.3446100"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","unstructured":"Derek\u00a0G. Murray Ji\u0159\u00ed \u0160im\u0161a Ana Klimovic and Ihor Indyk. 2021. tf.Data: A Machine Learning Data Processing Framework. Proc. VLDB Endow. 14 12 (jul 2021) 2945\u20132958. 10.14778\/3476311.3476374","DOI":"10.14778\/3476311.3476374"},{"key":"e_1_3_3_2_32_2","series-title":"(CIDR \u201923)","volume-title":"Proceedings of the 13th Annual Conference on Innovative Data Systems Research","author":"Nicholson Hamish","year":"2023","unstructured":"Hamish Nicholson, Aunn Raza, Periklis Chrysogelos, and Anastasia Ailamaki. 2023. HetCache: Synergising NVMe Storage and GPU acceleration for Memory-Efficient Analytics. In Proceedings of the 13th Annual Conference on Innovative Data Systems Research(CIDR \u201923)."},{"key":"e_1_3_3_2_33_2","unstructured":"NVIDIA. [n. d.]. Rapids. https:\/\/developer.nvidia.com\/rapids Last accessed: 10\/12\/2024."},{"key":"e_1_3_3_2_34_2","unstructured":"NVIDIA. [n. d.]. NVIDIA A100 Tensor Core GPU Architecture. https:\/\/images.nvidia.com\/aem-dam\/en-zz\/Solutions\/data-center\/nvidia-ampere-architecture-whitepaper.pdf Last accessed: 26\/11\/2024."},{"key":"e_1_3_3_2_35_2","unstructured":"NVIDIA. [n. d.]. NVIDIA GPUDirect Storage Installation and Troubleshooting Guide - MLNX_OFED Requirements and Installation. https:\/\/docs.nvidia.com\/gpudirect-storage\/troubleshooting-guide\/index.html#mofed-req-install Last accessed: 25\/11\/2024."},{"key":"e_1_3_3_2_36_2","unstructured":"NVIDIA. [n. d.]. NVIDIA TESLA V100 GPU ARCHITECTURE. https:\/\/images.nvidia.com\/content\/volta-architecture\/pdf\/volta-architecture-whitepaper.pdf Last accessed: 26\/11\/2024."},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","unstructured":"Jeongmin\u00a0Brian Park Vikram\u00a0Sharma Mailthody Zaid Qureshi and Wen-mei Hwu. 2024. Accelerating Sampling and Aggregation Operations in GNN Frameworks with GPU Initiated Direct Storage Accesses. Proc. VLDB Endow. 17 6 (Feb. 2024) 1227\u20131240. 10.14778\/3648160.3648166","DOI":"10.14778\/3648160.3648166"},{"key":"e_1_3_3_2_38_2","first-page":"8024","volume-title":"Advances in Neural Information Processing Systems 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems 32. Curran Associates, Inc., 8024\u20138035. http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3575693.3575748"},{"key":"e_1_3_3_2_40_2","unstructured":"Samsung. [n. d.]. 970 EVO Plus PCIe 3.0 SSD. https:\/\/semiconductor.samsung.com\/consumer-storage\/internal-ssd\/970evoplus\/ Last accessed: 11\/12\/2024."},{"key":"e_1_3_3_2_41_2","unstructured":"Samsung. [n. d.]. 980 PRO w\/ Heatsink PCIe 4.0 NVMe SSD 1TB. https:\/\/www.samsung.com\/us\/computing\/memory-storage\/solid-state-drives\/980-pro-w-heatsink-pcie-4-0-nvme-ssd-1tb-mz-v8p1t0cw\/ Last accessed: 21\/11\/2024."},{"key":"e_1_3_3_2_42_2","unstructured":"Samsung. [n. d.]. PM1733\/PM1735 Enterprise SSD. https:\/\/semiconductor.samsung.com\/ssd\/enterprise-ssd\/pm1733-pm1735\/ Last accessed: 10\/12\/2024."},{"key":"e_1_3_3_2_43_2","unstructured":"Samsung. [n. d.]. PM9A3 Data center SSD. https:\/\/semiconductor.samsung.com\/ssd\/datacenter-ssd\/pm9a3\/ Last accessed: 21\/11\/2024."},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"publisher","unstructured":"Sagi Shahar Shai Bergman and Mark Silberstein. 2018. ActivePointers: A Case for Software Address Translation on GPUs. SIGOPS Oper. Syst. Rev. 52 1 (Aug. 2018) 84\u201395. 10.1145\/3273982.3273990","DOI":"10.1145\/3273982.3273990"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","unstructured":"Mark Silberstein Bryan Ford Idit Keidar and Emmett Witchel. 2014. GPUfs: Integrating a file system with GPUs. ACM Trans. Comput. Syst. 32 1 Article 1 (Feb. 2014) 31\u00a0pages. 10.1145\/2553081","DOI":"10.1145\/2553081"},{"key":"e_1_3_3_2_46_2","unstructured":"PassMark Software. [n. d.]. AMD EPYC 7402P vs EPYC 7702. https:\/\/www.cpubenchmark.net\/compare\/3591vs3719\/AMD-EPYC-7402P-vs-AMD-EPYC-7702 Last accessed: 02\/12\/2024."},{"key":"e_1_3_3_2_47_2","unstructured":"SPDK. [n. d.]. SPDK GitHub Repository. https:\/\/github.com\/spdk\/spdk Last accessed: 25\/11\/2024."},{"key":"e_1_3_3_2_48_2","unstructured":"SPDK. [n. d.]. Storage Performance Development Kit (SPDK). https:\/\/spdk.io\/ Last accessed: 05\/12\/2024."},{"key":"e_1_3_3_2_49_2","unstructured":"Adam Thompson and C.\u00a0J. Newburn. 2019. GPUDirect Storage: A Direct Path Between Storage and GPU Memory. https:\/\/developer.nvidia.com\/blog\/gpudirect-storage\/ Last accessed: 21\/11\/2024."},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"publisher","unstructured":"Lasse Thostrup Gloria Doci Nils Boeschen Manisha Luthra and Carsten Binnig. 2023. Distributed GPU Joins on Fast RDMA-capable Networks. Proc. ACM Manag. Data 1 1 Article 29 (May 2023) 26\u00a0pages. 10.1145\/3588709","DOI":"10.1145\/3588709"},{"key":"e_1_3_3_2_51_2","unstructured":"TPC. [n. d.]. Transaction Processing and Performance Council. https:\/\/www.tpc.org\/ Last accessed: 19\/05\/2025."},{"key":"e_1_3_3_2_52_2","unstructured":"xNVMe. [n. d.]. Cross-platform libraries and tools for NVMe devices. https:\/\/xnvme.io\/"},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","unstructured":"Bobbi Yogatama Weiwei Gong and Xiangyao Yu. 2025. Scaling your Hybrid CPU-GPU DBMS to Multiple GPUs. Proc. VLDB Endow. 17 13 (Feb. 2025) 4709\u20134722. 10.14778\/3704965.3704977","DOI":"10.14778\/3704965.3704977"}],"event":{"name":"SIGMOD\/PODS '25: International Conference on Management of Data","location":"Berlin Germany","acronym":"DaMoN '25","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 21st International Workshop on Data Management on New Hardware"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3736227.3736232","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,15]],"date-time":"2025-07-15T09:17:34Z","timestamp":1752571054000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3736227.3736232"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,22]]},"references-count":52,"alternative-id":["10.1145\/3736227.3736232","10.1145\/3736227"],"URL":"https:\/\/doi.org\/10.1145\/3736227.3736232","relation":{},"subject":[],"published":{"date-parts":[[2025,6,22]]},"assertion":[{"value":"2025-07-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}