{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T23:28:48Z","timestamp":1777937328209,"version":"3.51.4"},"reference-count":52,"publisher":"Association for Computing Machinery (ACM)","issue":"3","license":[{"start":{"date-parts":[[2017,9,17]],"date-time":"2017-09-17T00:00:00Z","timestamp":1505606400000},"content-version":"vor","delay-in-days":365,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-1017785 and CCF-1333594"],"award-info":[{"award-number":["CNS-1017785 and CCF-1333594"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003977","name":"Israel Science Foundation","doi-asserted-by":"crossref","award":["1138\/14"],"award-info":[{"award-number":["1138\/14"]}],"id":[{"id":"10.13039\/501100003977","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Israeli Ministry of Science"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":["ACM Trans. Comput. Syst."],"published-print":{"date-parts":[[2016,9,17]]},"abstract":"<jats:p>Despite the popularity of GPUs in high-performance and scientific computing, and despite increasingly general-purpose hardware capabilities, the use of GPUs in network servers or distributed systems poses significant challenges.<\/jats:p>\n                  <jats:p>GPUnet is a native GPU networking layer that provides a socket abstraction and high-level networking APIs for GPU programs. We use GPUnet to streamline the development of high-performance, distributed applications like in-GPU-memory MapReduce and a new class of low-latency, high-throughput GPU-native network services such as a face verification server.<\/jats:p>","DOI":"10.1145\/2963098","type":"journal-article","created":{"date-parts":[[2016,9,19]],"date-time":"2016-09-19T16:11:45Z","timestamp":1474301505000},"page":"1-31","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":33,"title":["GPUnet"],"prefix":"10.1145","volume":"34","author":[{"given":"Mark","family":"Silberstein","sequence":"first","affiliation":[{"name":"Technion\u2014Israel Institute of Technology"}]},{"given":"Sangman","family":"Kim","sequence":"additional","affiliation":[{"name":"University of Texas at Austin"}]},{"given":"Seonggu","family":"Huh","sequence":"additional","affiliation":[{"name":"University of Texas at Austin"}]},{"given":"Xinya","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Texas at Austin"}]},{"given":"Yige","family":"Hu","sequence":"additional","affiliation":[{"name":"University of Texas at Austin"}]},{"given":"Amir","family":"Wated","sequence":"additional","affiliation":[{"name":"Technion\u2014Israel Institute of Technology"}]},{"given":"Emmett","family":"Witchel","sequence":"additional","affiliation":[{"name":"University of Texas at Austin"}]}],"member":"320","published-online":{"date-parts":[[2016,9,17]]},"reference":[{"key":"e_1_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541956"},{"key":"e_1_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2006.244"},{"key":"e_1_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/396\/4\/042059"},{"key":"e_1_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1629575.1629579"},{"key":"e_1_2_1_5_1","volume-title":"Christopher R. Johnson, Harshad Kasture, Filippo Sironi, Anant Agarwal, M. Frans Kaashoek, and Nickolai Zeldovich.","author":"Beckmann Nathan Z.","year":"2014","unstructured":"Nathan Z. Beckmann, Charles Gruenwald III, Christopher R. Johnson, Harshad Kasture, Filippo Sironi, Anant Agarwal, M. Frans Kaashoek, and Nickolai Zeldovich. 2014. PIKA: A Network Service for Multikernel Operating Systems. Technical Report MIT-CSAIL-TR-2014-002. Massachusetts Institute of Technology, Cambridge, MA. http:\/\/hdl.handle.net\/1721.1\/84608."},{"key":"e_1_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/1879141.1879175"},{"key":"e_1_2_1_7_1","volume-title":"Proceedings of the 30th International Conference on Machine Learning (ICML-13)","author":"Coates Adam","year":"2013","unstructured":"Adam Coates, Brody Huval, Tao Wang, David Wu, Bryan Catanzaro, and Ng Andrew. 2013. Deep learning with COTS HPC systems. In Proceedings of the 30th International Conference on Machine Learning (ICML-13). 1337--1345."},{"key":"e_1_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2931088.2931091"},{"key":"e_1_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.5555\/1251254.1251264"},{"key":"e_1_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/1282380.1282421"},{"key":"e_1_2_1_11_1","volume-title":"Retrieved","author":"Gupta Shalini","year":"2013","unstructured":"Shalini Gupta. 2013. Efficient Object Detection on GPUs Using MB-LBP Features and Random Forests. Retrieved August 21, 2016, from http:\/\/on-demand.gputechconf.com\/gtc\/2013\/presentations\/S3297-Efficient-Object-Detection-GPU-MB-LBP-Forest.pdf."},{"key":"e_1_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1851275.1851207"},{"key":"e_1_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.5555\/2387880.2387894"},{"key":"e_1_2_1_14_1","unstructured":"Sean Hefty. 2012. Rsockets. Available at https:\/\/www.openfabrics.org\/index.php\/resources\/document-downlo ads\/public-documents\/doc_download\/495-rsockets.html."},{"issue":"2","key":"e_1_2_1_15_1","first-page":"1","article-title":"InfiniBand Architecture Specification, Volume 1\u2014General Specification","volume":"1","author":"InfiniBand Trade Association","year":"2007","unstructured":"InfiniBand Trade Association. 2007. InfiniBand Architecture Specification, Volume 1\u2014General Specification, Release 1.2.1. InfiniBand Trade Association.","journal-title":"Release"},{"key":"e_1_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.5555\/1972457.1972459"},{"key":"e_1_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.5555\/2523721.2523758"},{"key":"e_1_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2502524.2502548"},{"key":"e_1_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.5555\/2002181.2002183"},{"key":"e_1_2_1_20_1","volume-title":"Retrieved","author":"Khronos Group","year":"2016","unstructured":"Khronos Group. 2016. OpenCL: The Open Standard for Parallel Programming of Heterogeneous Systems. Retrieved August 21, 2016, from http:\/\/www.khronos.org\/opencl."},{"key":"e_1_2_1_21_1","doi-asserted-by":"publisher","unstructured":"David B. Kirk and W. Hwu Wen-mei. 2010. Programming Massively Parallel Processors: A Hands-on Approach. Morgan Kaufmann.","DOI":"10.5555\/1841511"},{"key":"e_1_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.5555\/1364385.1364392"},{"key":"e_1_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541975"},{"key":"e_1_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:IJPP.0000029272.69895.c1"},{"key":"e_1_2_1_25_1","volume-title":"Retrieved","author":"NVIDIA.","year":"2015","unstructured":"NVIDIA. 2015. Developing a Linux Kernel Module Using GPUDirect RDMA. Retrieved August 21, 2016, from http:\/\/docs.nvidia.com\/cuda\/gpudirect-rdma\/index.html."},{"key":"e_1_2_1_26_1","volume-title":"Retrieved","author":"NVIDIA.","year":"2016","unstructured":"NVIDIA. 2016. GPU Applications. Retrieved August 21, 2016, from http:\/\/www.nvidia.com\/object\/ gpu-applications.html."},{"key":"e_1_2_1_27_1","unstructured":"Ohio State University Network-Based Computing Laboratory. 2015. MVAPICH2: High Performance MPI over InfiniBand iWARP and RoCE. http:\/\/mvapich.cse. ohio-state.edu. (2015)."},{"key":"e_1_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1713254.1713276"},{"key":"e_1_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2503210.2503288"},{"key":"e_1_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2013.17"},{"key":"e_1_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/2391229.2391242"},{"key":"e_1_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/2043556.2043579"},{"key":"e_1_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2517349.2522715"},{"key":"e_1_2_1_34_1","volume-title":"Retrieved","author":"Rossetti Davide","year":"2016","unstructured":"Davide Rossetti, Sreeram Potluri, and David Fontaine. 2016. State of GPUdirect Technologies. Retrieved August 21, 2016, from http:\/\/on-demand.gputechconf.com\/gtc\/2016\/presentation\/s6264-davide-rossetti-GPUDirect.pdf."},{"key":"e_1_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.5555\/1855840.1855845"},{"key":"e_1_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2451116.2451169"},{"key":"e_1_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2553081"},{"key":"e_1_2_1_38_1","volume-title":"Retrieved","author":"Silberstein Mark","year":"2014","unstructured":"Mark Silberstein, Sangman Kim, Seonggu Huh, Xinya Zhang, Yige Hu, Amir Wated, and Emmett Witchel. 2014b. GPUnet: Networking Abstractions for GPU Programs. Retrieved August 21, 2016, from https:\/\/sites.google.com\/site\/silbersteinmark\/GPUnet."},{"key":"e_1_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.5555\/161724"},{"key":"e_1_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.5555\/940269"},{"key":"e_1_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2011.102"},{"key":"e_1_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.5555\/2537857.2537861"},{"key":"e_1_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/1996092.1996095"},{"key":"e_1_2_1_44_1","volume-title":"Retrieved","author":"Taneja Group Technology Analysts.","year":"2012","unstructured":"Taneja Group Technology Analysts. 2012. InfiniBand Data Center March. Retrieved August 21, 2016, from https:\/\/cw.infinibandta.org\/document\/dl\/7269."},{"key":"e_1_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/2535372.2535412"},{"key":"e_1_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.5555\/2643634.2643668"},{"key":"e_1_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/2391229.2391237"},{"key":"e_1_2_1_48_1","volume-title":"Retrieved","author":"Volkov Vasily","year":"2010","unstructured":"Vasily Volkov. 2010. Better Performance at Lower Occupancy. Retrieved August 21, 2016, from http:\/\/www.cs.berkeley.edu\/&sim;volkov\/volkov10-GTC.pdf."},{"key":"e_1_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/1165389.945471"},{"key":"e_1_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/502059.502057"},{"key":"e_1_2_1_51_1","volume-title":"Proceedings of the OpenFabrics Alliance International Developer Workshop.","author":"Woodruf Bob","year":"2013","unstructured":"Bob Woodruf. 2013. OFS Software for the Intel Xeon Phi. In Proceedings of the OpenFabrics Alliance International Developer Workshop."},{"key":"e_1_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/2884045.2884053"}],"container-title":["ACM Transactions on Computer Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2963098","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2963098","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2963098","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T09:33:36Z","timestamp":1763458416000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2963098"}},"subtitle":["Networking Abstractions for GPU Programs"],"short-title":[],"issued":{"date-parts":[[2016,9,17]]},"references-count":52,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2016,9,17]]}},"alternative-id":["10.1145\/2963098"],"URL":"https:\/\/doi.org\/10.1145\/2963098","relation":{},"ISSN":["0734-2071","1557-7333"],"issn-type":[{"value":"0734-2071","type":"print"},{"value":"1557-7333","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,9,17]]},"assertion":[{"value":"2016-01-01","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2016-06-01","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2016-09-17","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}