{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T20:59:01Z","timestamp":1775854741167,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:00:00Z","timestamp":1755820800000},"content-version":"vor","delay-in-days":75,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2321123,2340982"],"award-info":[{"award-number":["2321123,2340982"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000015","name":"DOE U.S. Department of Energy","doi-asserted-by":"publisher","award":["DE-SC0024207"],"award-info":[{"award-number":["DE-SC0024207"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,8]]},"DOI":"10.1145\/3721145.3725780","type":"proceedings-article","created":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T12:57:17Z","timestamp":1755867437000},"page":"807-821","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Understanding the Idiosyncrasies of Emerging BlueField DPUs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-0941-1295","authenticated-orcid":false,"given":"Arjun","family":"Kashyap","sequence":"first","affiliation":[{"name":"University of California, Merced, Merced, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5806-7228","authenticated-orcid":false,"given":"Yuke","family":"Li","sequence":"additional","affiliation":[{"name":"University of California, Merced, Merced, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9774-5514","authenticated-orcid":false,"given":"Darren","family":"Ng","sequence":"additional","affiliation":[{"name":"University of California, Merced, Merced, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7581-8905","authenticated-orcid":false,"given":"Xiaoyi","family":"Lu","sequence":"additional","affiliation":[{"name":"University of California, Merced, Merced, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,8,22]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"2025. SMhasher. https:\/\/github.com\/rurban\/smhasher."},{"key":"e_1_3_3_1_3_2","unstructured":"2025. Tinymembench. https:\/\/github.com\/ssvb\/tinymembench."},{"key":"e_1_3_3_1_4_2","unstructured":"AMD. 2025. AMD Pensando. https:\/\/www.amd.com\/en\/products\/accelerators\/pensando.html."},{"key":"e_1_3_3_1_5_2","unstructured":"ARM. 2025. Neon. https:\/\/developer.arm.com\/Architectures\/Neon."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2013.6522302"},{"key":"e_1_3_3_1_7_2","first-page":"281","volume-title":"2016 USENIX Annual Technical Conference (USENIX ATC 16)","author":"Breslow Alex\u00a0D.","year":"2016","unstructured":"Alex\u00a0D. Breslow, Dong\u00a0Ping Zhang, Joseph\u00a0L. Greathouse, Nuwan Jayasena, and Dean\u00a0M. Tullsen. 2016. Horton Tables: Fast Hash Tables for In-Memory Data-Intensive Computing. In 2016 USENIX Annual Technical Conference (USENIX ATC 16). USENIX Association, Denver, CO, 281\u2013294. https:\/\/www.usenix.org\/conference\/atc16\/technical-sessions\/presentation\/breslow"},{"key":"e_1_3_3_1_8_2","unstructured":"Broadcom. 2018. Stingray\u2122 PS225 SmartNIC Adapters. https:\/\/datasheet.octopart.com\/PS225\u2014DUAL-PORT-25GBE-PCIE-ETHERNET-SMARTNIC-Avago-datasheet-116220963.pdf."},{"key":"e_1_3_3_1_9_2","unstructured":"Xuzheng Chen Jie Zhang Ting Fu Yifan Shen Shu Ma Kun Qian Lingjun Zhu Chao Shi Yin Zhang Ming Liu and Zeke Wang. 2024. Demystifying Datapath Accelerator Enhanced Off-path SmartNIC. arxiv:https:\/\/arXiv.org\/abs\/2402.03041\u00a0[cs.NI] https:\/\/arxiv.org\/abs\/2402.03041"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Biplob Debnath Sudipta Sengupta and Jin Li. 2010. FlashStore: High Throughput Persistent Key-Value Store. Proc. VLDB Endow. 3 1\u20132 (sep 2010) 1414\u20131425. https:\/\/doi.org\/10.14778\/1920841.1921015","DOI":"10.14778\/1920841.1921015"},{"key":"e_1_3_3_1_11_2","unstructured":"Jon Dugan Seth Elliott Bruce\u00a0A. Mah Jeff Poskanzer and Kaustubh Prabhu. [n. d.]. iPerf - The Ultimate Speed Test Tool for TCP UDP and SCTP. https:\/\/iperf.fr\/."},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/CCGRID.2017.49"},{"key":"e_1_3_3_1_13_2","unstructured":"Shay Gueron and Michael\u00a0E Kounavis. 2010. Intel\u00ae Carry-Less Multiplication Instruction and its Usage for Computing the GCM Mode. White Paper (2010) 10."},{"key":"e_1_3_3_1_14_2","unstructured":"Intel. [n. d.]. DPDK based Packet Generator. https:\/\/github.com\/pktgen\/Pktgen-DPDK."},{"key":"e_1_3_3_1_15_2","unstructured":"Intel. 2016. Open vSwitch* with DPDK Overview. https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/articles\/technical\/open-vswitch-with-dpdk-overview.html."},{"key":"e_1_3_3_1_16_2","unstructured":"Intel. 2024. Poll Mode Driver. https:\/\/doc.dpdk.org\/guides\/prog_guide\/poll_mode_drv.html."},{"key":"e_1_3_3_1_17_2","unstructured":"Intel. 2025. Intel\u00ae Advanced Vector Extensions 512. https:\/\/www.intel.com\/content\/www\/us\/en\/architecture-and-technology\/avx-512-overview.html."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2015.79"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/HOTI52880.2021.00017"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Anuj Kalia Michael Kaminsky and David\u00a0G. Andersen. 2014. Using RDMA Efficiently For Key-Value Services. SIGCOMM Comput. Commun. Rev. 44 4 (aug 2014) 295\u2013306. https:\/\/doi.org\/10.1145\/2740070.2626299","DOI":"10.1145\/2740070.2626299"},{"key":"e_1_3_3_1_21_2","first-page":"437","volume-title":"2016 USENIX Annual Technical Conference (USENIX ATC 16)","author":"Kalia Anuj","year":"2016","unstructured":"Anuj Kalia, Michael Kaminsky, and David\u00a0G. Andersen. 2016. Design Guidelines for High Performance RDMA Systems. In 2016 USENIX Annual Technical Conference (USENIX ATC 16). USENIX Association, Denver, CO, 437\u2013450. https:\/\/www.usenix.org\/conference\/atc16\/technical-sessions\/presentation\/kalia"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS53621.2022.00063"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3477132.3483565"},{"key":"e_1_3_3_1_24_2","first-page":"12","volume-title":"2nd Greater Chicago Area System Research Workshop (GCASR)","author":"Li Tonglin","year":"2013","unstructured":"Tonglin Li, Xiaobing Zhou, Kevin Brandstatter, and Ioan Raicu. 2013. Distributed Key-Value Store on HPC and Cloud Systems. In 2nd Greater Chicago Area System Research Workshop (GCASR) , Vol.\u00a0238. 12."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS57955.2024.00040"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/HOTI59126.2023.00019"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"Yuke Li Arjun Kashyap Yanfei Guo and Xiaoyi Lu. 2024. Compression Analysis for BlueField-2\/-3 Data Processing Units: Lossy and Lossless Perspectives. IEEE Micro 44 02 (March 2024) 8\u201319. https:\/\/doi.org\/10.1109\/MM.2023.3343636","DOI":"10.1109\/MM.2023.3343636"},{"key":"e_1_3_3_1_28_2","unstructured":"Hyeontaek Lim. 2024. MICA2. https:\/\/github.com\/efficient\/mica2."},{"key":"e_1_3_3_1_29_2","first-page":"429","volume-title":"11th USENIX Symposium on Networked Systems Design and Implementation (NSDI 14)","author":"Lim Hyeontaek","year":"2014","unstructured":"Hyeontaek Lim, Dongsu Han, David\u00a0G. Andersen, and Michael Kaminsky. 2014. MICA: A Holistic Approach to Fast In-Memory Key-Value Storage. In 11th USENIX Symposium on Networked Systems Design and Implementation (NSDI 14). USENIX Association, Seattle, WA, 429\u2013444. https:\/\/www.usenix.org\/conference\/nsdi14\/technical-sessions\/presentation\/lim"},{"key":"e_1_3_3_1_30_2","unstructured":"Marvell. 2025. Marvell LiquidIO III. https:\/\/www.marvell.com\/content\/dam\/marvell\/en\/public-collateral\/embedded-processors\/marvell-liquidio-III-solutions-brief.pdf."},{"key":"e_1_3_3_1_31_2","unstructured":"MARVELL. 2025. Marvell\u00ae LiquidIO\u2122 III. https:\/\/www.marvell.com\/content\/dam\/marvell\/en\/public-collateral\/embedded-processors\/marvell-liquidio-III-solutions-brief.pdf."},{"key":"e_1_3_3_1_32_2","unstructured":"MARVELL. 2025. Marvell\u00ae OCTEON 10 DPU Platform. https:\/\/www.marvell.com\/content\/dam\/marvell\/en\/public-collateral\/embedded-processors\/marvell-octeon-10-dpu-platform-product-brief.pdf."},{"key":"e_1_3_3_1_33_2","unstructured":"John\u00a0D. McCalpin. 2025. STREAM: Sustainable Memory Bandwidth in High Performance Computers. https:\/\/www.cs.virginia.edu\/stream\/."},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3569951.3593595"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/HOTI59126.2023.00020"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3230543.3230560"},{"key":"e_1_3_3_1_37_2","unstructured":"NVDIA. 2025. RDMA Stack Support on Host and Arm System. https:\/\/docs.nvidia.com\/networking\/display\/bluefielddpuosv393\/rdma+stack+support+on+host+and+arm+system."},{"key":"e_1_3_3_1_38_2","unstructured":"NVIDIA. 2020. NVIDIA Mellanox BlueField Data Processing Unit (DPU. https:\/\/network.nvidia.com\/sites\/default\/files\/doc-2020\/pb-bluefield-dpu.pdf."},{"key":"e_1_3_3_1_39_2","unstructured":"NVIDIA. 2023. NVIDIA BLUEFIELD-2 DPU. https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/Data-Center\/documents\/datasheet-nvidia-bluefield-2-dpu.pdf."},{"key":"e_1_3_3_1_40_2","unstructured":"NVIDIA. 2024. NVIDIA BLUEFIELD-3 DPU. https:\/\/www.nvidia.com\/content\/dam\/en-zz\/Solutions\/Data-Center\/documents\/datasheet-nvidia-bluefield-3-dpu.pdf."},{"key":"e_1_3_3_1_41_2","unstructured":"NVIDIA. 2024. NVIDIA DOCA Software Framework. https:\/\/developer.nvidia.com\/networking\/doca."},{"key":"e_1_3_3_1_42_2","unstructured":"NVIDIA. 2025. DPA Subsystem. https:\/\/docs.nvidia.com\/doca\/sdk\/dpa+subsystem\/index.html."},{"key":"e_1_3_3_1_43_2","unstructured":"NVIDIA. 2025. NVIDIA BlueField DPU Modes of Operation. https:\/\/docs.nvidia.com\/doca\/sdk\/bluefield+modes+of+operation\/index.html."},{"key":"e_1_3_3_1_44_2","unstructured":"OFED. 2024. Infiniband Verbs Performance Tests. https:\/\/github.com\/linux-rdma\/perftest."},{"key":"e_1_3_3_1_45_2","unstructured":"A\u00a0Linux Foundation\u00a0Collaborative Project. 2016. Open vSwitch. https:\/\/www.openvswitch.org\/."},{"key":"e_1_3_3_1_46_2","unstructured":"A\u00a0Linux Foundation\u00a0Collaborative Project. 2016. Open vSwitch with DPDK. https:\/\/docs.openvswitch.org\/en\/latest\/intro\/install\/dpdk\/."},{"key":"e_1_3_3_1_47_2","unstructured":"Jeffrey\u00a0Keith Rott. 2012. Intel\u00ae Advanced Encryption Standard Instructions (AES-NI). https:\/\/www.intel.com\/content\/www\/us\/en\/developer\/articles\/technical\/advanced-encryption-standard-instructions-aes-ni.html."},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS54959.2023.00022"},{"key":"e_1_3_3_1_49_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2017.39"},{"key":"e_1_3_3_1_50_2","first-page":"987","volume-title":"17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23)","author":"Wei Xingda","year":"2023","unstructured":"Xingda Wei, Rongxin Cheng, Yuhan Yang, Rong Chen, and Haibo Chen. 2023. Characterizing Off-path SmartNIC for Accelerating Distributed Systems. In 17th USENIX Symposium on Operating Systems Design and Implementation (OSDI 23). USENIX Association, Boston, MA, 987\u20131004. https:\/\/www.usenix.org\/conference\/osdi23\/presentation\/wei-smartnic"},{"key":"e_1_3_3_1_51_2","unstructured":"Xilinx. [n. d.]. Alveo: Adaptable Accelerator Cards for Data Center Workloads. https:\/\/www.xilinx.com\/products\/boards-and-kits\/alveo.html."},{"key":"e_1_3_3_1_52_2","doi-asserted-by":"publisher","DOI":"10.1145\/3546591.3547528"},{"key":"e_1_3_3_1_53_2","doi-asserted-by":"publisher","DOI":"10.1145\/3465998.3466002"}],"event":{"name":"ICS '25: 2025 International Conference on Supercomputing","location":"Salt Lake City USA","acronym":"ICS '25","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 39th ACM International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721145.3725780","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721145.3725780","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T13:04:54Z","timestamp":1755867894000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721145.3725780"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,8]]},"references-count":52,"alternative-id":["10.1145\/3721145.3725780","10.1145\/3721145"],"URL":"https:\/\/doi.org\/10.1145\/3721145.3725780","relation":{},"subject":[],"published":{"date-parts":[[2025,6,8]]},"assertion":[{"value":"2025-08-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}