{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T19:16:42Z","timestamp":1772911002305,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,3,9]],"date-time":"2020-03-09T00:00:00Z","timestamp":1583712000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Israel Science Foundation","award":["1027\/18"],"award-info":[{"award-number":["1027\/18"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,3,9]]},"DOI":"10.1145\/3373376.3378528","type":"proceedings-article","created":{"date-parts":[[2020,3,13]],"date-time":"2020-03-13T22:37:01Z","timestamp":1584139021000},"page":"117-131","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":60,"title":["Lynx"],"prefix":"10.1145","author":[{"given":"Maroun","family":"Tork","sequence":"first","affiliation":[{"name":"Technion - Israel Institute of Technology, Haifa, Israel"}]},{"given":"Lina","family":"Maudlej","sequence":"additional","affiliation":[{"name":"Technion - Israel Institute of Technology, Haifa, Israel"}]},{"given":"Mark","family":"Silberstein","sequence":"additional","affiliation":[{"name":"Technion - Israel Institute of Technology, Haifa, Israel"}]}],"member":"320","published-online":{"date-parts":[[2020,3,13]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16)","author":"Abadi Martin","year":"2016"},{"key":"e_1_3_2_1_2_1","volume-title":"ASPLOS '14","author":"Agrawal Sandeep R.","year":"2014"},{"key":"e_1_3_2_1_3_1","volume-title":"Face Description with Local Binary Patterns: Application to Face Recognition","author":"Ahonen Timo","year":"2006"},{"key":"e_1_3_2_1_4_1","unstructured":"Amazon Elastic Inference. [n.d.]. Amazon Elastic Inference: Add GPU acceleration to any Amazon EC2 instance for faster inference at much lower cost. https:\/\/aws.amazon.com\/machine-learning\/elastic-inference\/.  Amazon Elastic Inference. [n.d.]. Amazon Elastic Inference: Add GPU acceleration to any Amazon EC2 instance for faster inference at much lower cost. https:\/\/aws.amazon.com\/machine-learning\/elastic-inference\/."},{"key":"e_1_3_2_1_5_1","volume-title":"Pier Stanislao Paolucci, F. Pantaleo","author":"Ammendola Roberto","year":"2014"},{"key":"e_1_3_2_1_6_1","unstructured":"Cavium. [n.d.]. LiquidIO SmartNIC family of intelligent adapters provides high performance industry-leading programmable server adapter solutions for various data center deployments. https:\/\/www.marvell.com\/ethernet-adapters-and-controllers\/liquidio-smart-nics\/index.jsp .  Cavium. [n.d.]. LiquidIO SmartNIC family of intelligent adapters provides high performance industry-leading programmable server adapter solutions for various data center deployments. https:\/\/www.marvell.com\/ethernet-adapters-and-controllers\/liquidio-smart-nics\/index.jsp ."},{"key":"e_1_3_2_1_7_1","volume-title":"TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018"},{"key":"e_1_3_2_1_8_1","first-page":"1","article-title":"GPUrdma: GPU-side Library for High Performance Networking from GPU Kernels. ACM, New York","volume":"6","author":"Daoud Feras","year":"2016","journal-title":"NY, USA"},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the 33rd International Conference on International Conference on Machine Learning -","volume":"48","author":"Diamos Gregory","year":"2016"},{"key":"e_1_3_2_1_10_1","unstructured":"Dotan Barak. [n.d.] a. RDMAmojo -- blog on RDMA technology and programming. https:\/\/www.rdmamojo.com\/2013\/06\/01\/which-queue-pair-type-to-use\/.  Dotan Barak. [n.d.] a. RDMAmojo -- blog on RDMA technology and programming. https:\/\/www.rdmamojo.com\/2013\/06\/01\/which-queue-pair-type-to-use\/."},{"key":"e_1_3_2_1_11_1","unstructured":"Dotan Barak. [n.d.] b. RDMAmojo -- blog on RDMA technology and programming. https:\/\/www.rdmamojo.com\/2013\/01\/26\/ibv_post_send\/.  Dotan Barak. [n.d.] b. RDMAmojo -- blog on RDMA technology and programming. https:\/\/www.rdmamojo.com\/2013\/01\/26\/ibv_post_send\/."},{"key":"e_1_3_2_1_12_1","volume-title":"NICA: OS Support for Near-data Network Application Accelerators. In International Workshop on Multi-core and Rack-scale Systems (MARS17)","author":"Eran Hagai","year":"2017"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Alireza Farshin Amir Roozbeh Gerald Q. Maguire  Jr and Dejan Kostic. 2019. Make the Most out of Last Level Cache in Intel Processors (EuroSys '19). https:\/\/people.kth.se\/ farshin\/documents\/slice-aware-eurosys19.pdf  Alireza Farshin Amir Roozbeh Gerald Q. Maguire Jr and Dejan Kostic. 2019. Make the Most out of Last Level Cache in Intel Processors (EuroSys '19). https:\/\/people.kth.se\/ farshin\/documents\/slice-aware-eurosys19.pdf","DOI":"10.1145\/3302424.3303977"},{"key":"e_1_3_2_1_14_1","volume-title":"Azure Accelerated Networking: SmartNICs in the Public Cloud. In 15th USENIX Symposium on Networked Systems Design and Implementation (NSDI 18)","author":"Firestone Daniel","year":"2018"},{"key":"e_1_3_2_1_15_1","unstructured":"Google AutoML. [n.d.]. AutoML: Train high-quality custom machine learning models with minimal effort and machine learning expertise. https:\/\/cloud.google.com\/automl\/.  Google AutoML. [n.d.]. AutoML: Train high-quality custom machine learning models with minimal effort and machine learning expertise. https:\/\/cloud.google.com\/automl\/."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2016.51"},{"key":"e_1_3_2_1_17_1","unstructured":"Habana. [n.d.]. Goya deep learning inference accelerator: White paper. https:\/\/habana.ai\/wp-content\/uploads\/2019\/06\/Goya-Whitepaper-Inference-Performance.pdf .  Habana. [n.d.]. Goya deep learning inference accelerator: White paper. https:\/\/habana.ai\/wp-content\/uploads\/2019\/06\/Goya-Whitepaper-Inference-Performance.pdf ."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2806777.2806836"},{"key":"e_1_3_2_1_19_1","unstructured":"Huawei. [n.d.]. FPGA-Accelerated Cloud Server. https:\/\/www.huaweicloud.com\/en-us\/product\/fcs.html .  Huawei. [n.d.]. FPGA-Accelerated Cloud Server. https:\/\/www.huaweicloud.com\/en-us\/product\/fcs.html ."},{"key":"e_1_3_2_1_20_1","unstructured":"Intel. [n.d.] a. Intel\u00ae Software Guard Extensions (Intel\u00ae SGX). https:\/\/www.intel.com\/content\/www\/us\/en\/architecture-and-technology\/software-guard-extensions.html .  Intel. [n.d.] a. Intel\u00ae Software Guard Extensions (Intel\u00ae SGX). https:\/\/www.intel.com\/content\/www\/us\/en\/architecture-and-technology\/software-guard-extensions.html ."},{"key":"e_1_3_2_1_21_1","unstructured":"Intel. [n.d.] b. Intel\u00ae Visual Compute Accelerator (Intel\u00ae VCA) Product Brief. https:\/\/www.intel.com\/content\/www\/us\/en\/servers\/media-and-graphics\/visual-compute-accelerator-brief.html .  Intel. [n.d.] b. Intel\u00ae Visual Compute Accelerator (Intel\u00ae VCA) Product Brief. https:\/\/www.intel.com\/content\/www\/us\/en\/servers\/media-and-graphics\/visual-compute-accelerator-brief.html ."},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the 8th USENIX Symposium on Networked Systems Design and Implementation, NSDI 2011","author":"Jang Keon","year":"2011"},{"key":"e_1_3_2_1_23_1","unstructured":"Jonathon Phillips. [n.d.]. color FERET Database. https:\/\/www.nist.gov\/itl\/iad\/image-group\/color-feret-database .  Jonathon Phillips. [n.d.]. color FERET Database. https:\/\/www.nist.gov\/itl\/iad\/image-group\/color-feret-database ."},{"key":"e_1_3_2_1_24_1","volume-title":"FlexNIC: Rethinking Network DMA. In 15th Workshop on Hot Topics in Operating Systems, HotOS XV","author":"Kaufmann Antoine","year":"2015"},{"key":"e_1_3_2_1_25_1","first-page":"1","article-title":"NBA (Network Balancing Act)","volume":"22","author":"Kim Joongi","year":"2015","journal-title":"A High-performance Packet Processing Framework for Heterogeneous Processors. ACM"},{"key":"e_1_3_2_1_26_1","volume-title":"GPUnet: Networking Abstractions for GPU Programs. In 11th USENIX Symposium on Operating Systems Design and Implementation (OSDI 14)","author":"Kim Sangman","year":"2014"},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of the IEEE, november 1998 . http:\/\/yann.lecun.com\/exdb\/publis\/pdf\/lecun-01a.pdf","author":"LeCun Yann"},{"key":"e_1_3_2_1_28_1","volume-title":"2nd Asia-Pacific Workshop on Networking (APNet","author":"Luo Layong Larry","year":"2018"},{"key":"e_1_3_2_1_29_1","unstructured":"Mellanox Technologies. [n.d.] a. BlueField SmartNIC . http:\/\/www.mellanox.com\/page\/products_dyn?product_family=275&mtag=bluefield_smart_nic .  Mellanox Technologies. [n.d.] a. BlueField SmartNIC . http:\/\/www.mellanox.com\/page\/products_dyn?product_family=275&mtag=bluefield_smart_nic ."},{"key":"e_1_3_2_1_30_1","unstructured":"Mellanox Technologies. [n.d.] b. Mellanox OpenFabrics Enterprise Distribution for Linux (MLNX_OFED). http:\/\/www.mellanox.com\/page\/products_dyn?product_family=26 .  Mellanox Technologies. [n.d.] b. Mellanox OpenFabrics Enterprise Distribution for Linux (MLNX_OFED). http:\/\/www.mellanox.com\/page\/products_dyn?product_family=26 ."},{"key":"e_1_3_2_1_31_1","unstructured":"Mellanox Technologies. 2018a. libvma: Linux user-space library for network socket acceleration based on RDMA compatible network adaptors. https:\/\/github.com\/Mellanox\/libvma .  Mellanox Technologies. 2018a. libvma: Linux user-space library for network socket acceleration based on RDMA compatible network adaptors. https:\/\/github.com\/Mellanox\/libvma ."},{"key":"e_1_3_2_1_32_1","unstructured":"Mellanox Technologies. 2018b. sockperf: Network Benchmarking Utility. https:\/\/github.com\/Mellanox\/sockperf .  Mellanox Technologies. 2018b. sockperf: Network Benchmarking Utility. https:\/\/github.com\/Mellanox\/sockperf ."},{"key":"e_1_3_2_1_33_1","unstructured":"Microsoft Brainwave. [n.d.]. Brainwave: a deep learning platform for real-time AI serving in the cloud. https:\/\/www.microsoft.com\/en-us\/research\/project\/project-brainwave\/.  Microsoft Brainwave. [n.d.]. Brainwave: a deep learning platform for real-time AI serving in the cloud. https:\/\/www.microsoft.com\/en-us\/research\/project\/project-brainwave\/."},{"key":"e_1_3_2_1_34_1","unstructured":"Microsoft Catapult. [n.d.]. Microsoft Catapult: Transforming cloud computing by augmenting CPUs with an interconnected and configurable compute layer composed of programmable silicon. https:\/\/www.microsoft.com\/en-us\/research\/project\/project-catapult\/.  Microsoft Catapult. [n.d.]. Microsoft Catapult: Transforming cloud computing by augmenting CPUs with an interconnected and configurable compute layer composed of programmable silicon. https:\/\/www.microsoft.com\/en-us\/research\/project\/project-catapult\/."},{"key":"e_1_3_2_1_35_1","unstructured":"Nguyen Khang T. [n.d.]. Introduction to Cache Allocation Technology in the Intel\u00ae Xeon\u00ae Processor E5 v4 Family. https:\/\/software.intel.com\/en-us\/articles\/introduction-to-cache-allocation-technology .  Nguyen Khang T. [n.d.]. Introduction to Cache Allocation Technology in the Intel\u00ae Xeon\u00ae Processor E5 v4 Family. https:\/\/software.intel.com\/en-us\/articles\/introduction-to-cache-allocation-technology ."},{"key":"e_1_3_2_1_36_1","unstructured":"NVIDIA. [n.d.] a. CUDA Dynamic Parallelism API and Principles. https:\/\/devblogs.nvidia.com\/cuda-dynamic-parallelism-api-principles\/.  NVIDIA. [n.d.] a. CUDA Dynamic Parallelism API and Principles. https:\/\/devblogs.nvidia.com\/cuda-dynamic-parallelism-api-principles\/."},{"key":"e_1_3_2_1_37_1","unstructured":"NVIDIA. [n.d.] b. A fast GPU memory copy library based on NVIDIA GPUDirect RDMA technology. https:\/\/github.com\/NVIDIA\/gdrcopy .  NVIDIA. [n.d.] b. A fast GPU memory copy library based on NVIDIA GPUDirect RDMA technology. https:\/\/github.com\/NVIDIA\/gdrcopy ."},{"key":"e_1_3_2_1_38_1","unstructured":"NVIDIA. [n.d.] c. GPUDirect RDMA: Developing a Linux Kernel Module using GPUDirect RDMA . https:\/\/docs.nvidia.com\/cuda\/gpudirect-rdma\/index.html .  NVIDIA. [n.d.] c. GPUDirect RDMA: Developing a Linux Kernel Module using GPUDirect RDMA . https:\/\/docs.nvidia.com\/cuda\/gpudirect-rdma\/index.html ."},{"key":"e_1_3_2_1_39_1","unstructured":"NVSHMEM. [n.d.]. GPU-side API for remote data access collectives and synchronization. http:\/\/www.openshmem.org\/site\/sites\/default\/site_files\/SC2017-BOF-NVIDIA.pdf .  NVSHMEM. [n.d.]. GPU-side API for remote data access collectives and synchronization. http:\/\/www.openshmem.org\/site\/sites\/default\/site_files\/SC2017-BOF-NVIDIA.pdf ."},{"key":"e_1_3_2_1_40_1","volume-title":"InfiniBand Verbs on GPU: a case study of controlling an InfiniBand network device from the GPU . IJHPCA","author":"Oden Lena","year":"2017"},{"key":"e_1_3_2_1_41_1","volume-title":"Floem: A Programming System for NIC-Accelerated Network Applications. In 13th USENIX Symposium on Operating Systems Design and Implementation, OSDI 2018","author":"Phothilimthana Phitchaya Mangpo","year":"2018"},{"key":"e_1_3_2_1_42_1","volume-title":"GPU-Centric Communication on NVIDIA GPU Clusters with InfiniBand: A Case Study with OpenSHMEM. In 24th IEEE International Conference on High Performance Computing, HiPC 2017","author":"Potluri Sreeram","year":"2017"},{"key":"e_1_3_2_1_43_1","volume-title":"Efficient Inter-node MPI Communication Using GPUDirect RDMA for InfiniBand Clusters with NVIDIA GPUs. In 42nd International Conference on Parallel Processing, ICPP 2013","author":"Potluri Sreeram","year":"2013"},{"key":"e_1_3_2_1_44_1","unstructured":"Davide Rossetti and Elena Agostini. [n.d.]. How to make your life easier in the age of exascale computing using NVIDIA GPUDirect technologies. https:\/\/developer.download.nvidia.com\/video\/gputechconf\/gtc\/2019\/presentation\/s9653-how-to-make-your-life-easier-in-the-age-of-exascale-computing-using-nvidia-gpudirect-technologies.pdf .  Davide Rossetti and Elena Agostini. [n.d.]. How to make your life easier in the age of exascale computing using NVIDIA GPUDirect technologies. https:\/\/developer.download.nvidia.com\/video\/gputechconf\/gtc\/2019\/presentation\/s9653-how-to-make-your-life-easier-in-the-age-of-exascale-computing-using-nvidia-gpudirect-technologies.pdf ."},{"key":"e_1_3_2_1_45_1","unstructured":"Selectel. 2018. FPGA-acce\u00adle\u00adra\u00adtors go into the clouds [Russian]. https:\/\/blog.selectel.ru\/fpga-uskoriteli-uxodyat-v-oblaka\/.  Selectel. 2018. FPGA-acce\u00adle\u00adra\u00adtors go into the clouds [Russian]. https:\/\/blog.selectel.ru\/fpga-uskoriteli-uxodyat-v-oblaka\/."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2017.19"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/2553081"},{"key":"e_1_3_2_1_48_1","unstructured":"TensorFlow Light manual. [n.d.]. TensorFlow Light Delegates. https:\/\/www.tensorflow.org\/lite\/performance\/delegates .  TensorFlow Light manual. [n.d.]. TensorFlow Light Delegates. https:\/\/www.tensorflow.org\/lite\/performance\/delegates ."},{"key":"e_1_3_2_1_49_1","volume-title":"GASPP: A GPU-Accelerated Stateful Packet Processing Framework. In 2014 USENIX Annual Technical Conference, USENIX ATC '14","author":"Vasiliadis Giorgos","year":"2014"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2019.00027"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3190508.3190511"},{"key":"e_1_3_2_1_52_1","unstructured":"Wang Xu. 2018. Hardware Acceleration over NFV in China Mobile . https:\/\/wiki.opnfv.org\/download\/attachments\/20745096\/opnfv_Acc.pdf?version=1&modificationDate=1528124448000&api=v2 .  Wang Xu. 2018. Hardware Acceleration over NFV in China Mobile . https:\/\/wiki.opnfv.org\/download\/attachments\/20745096\/opnfv_Acc.pdf?version=1&modificationDate=1528124448000&api=v2 ."},{"key":"e_1_3_2_1_53_1","unstructured":"Yann LeCun. [n.d.]. THE MNIST DATABASE of handwritten digits. http:\/\/yann.lecun.com\/exdb\/mnist\/.  Yann LeCun. [n.d.]. THE MNIST DATABASE of handwritten digits. http:\/\/yann.lecun.com\/exdb\/mnist\/."}],"event":{"name":"ASPLOS '20: Architectural Support for Programming Languages and Operating Systems","location":"Lausanne Switzerland","acronym":"ASPLOS '20","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the Twenty-Fifth International Conference on Architectural Support for Programming Languages and Operating Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3373376.3378528","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3373376.3378528","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:38:16Z","timestamp":1750199896000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3373376.3378528"}},"subtitle":["A SmartNIC-driven Accelerator-centric Architecture for Network Servers"],"short-title":[],"issued":{"date-parts":[[2020,3,9]]},"references-count":53,"alternative-id":["10.1145\/3373376.3378528","10.1145\/3373376"],"URL":"https:\/\/doi.org\/10.1145\/3373376.3378528","relation":{},"subject":[],"published":{"date-parts":[[2020,3,9]]},"assertion":[{"value":"2020-03-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}