{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T01:26:23Z","timestamp":1781054783274,"version":"3.54.1"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030049171","type":"print"},{"value":"9783030049188","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-04918-8_10","type":"book-chapter","created":{"date-parts":[[2019,3,18]],"date-time":"2019-03-18T06:05:44Z","timestamp":1552889144000},"page":"148-164","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Designing High-Performance In-Memory Key-Value Operations with Persistent GPU Kernels and OpenSHMEM"],"prefix":"10.1007","author":[{"given":"Ching-Hsiang","family":"Chu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sreeram","family":"Potluri","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Anshuman","family":"Goswami","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Manjunath","family":"Gorentla Venkata","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Neena","family":"Imam","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chris J.","family":"Newburn","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2019,3,19]]},"reference":[{"key":"10_CR1","unstructured":"A fast GPU memory copy library based on NVIDIA GPUDirect RDMA technology. \n                      https:\/\/github.com\/NVIDIA\/gdrcopy\n                      \n                    . Accessed 9 Sept 2018"},{"key":"10_CR2","unstructured":"Mega-KV: A GPU-Based In-Memory Key-Value Store. \n                      http:\/\/kay21s.github.io\/megakv\/\n                      \n                    . Accessed 9 Sept 2018"},{"key":"10_CR3","unstructured":"MVAPICH: MPI over InfiniBand, Omni-Path, Ethernet\/iWARP, and RoCE. \n                      http:\/\/mvapich.cse.ohio-state.edu\/\n                      \n                    . Accessed 9 Sept 2018"},{"key":"10_CR4","unstructured":"NVIDIA CUDA. \n                      http:\/\/docs.nvidia.com\/cuda\n                      \n                    . Accessed 9 Sept 2018"},{"key":"10_CR5","unstructured":"NVIDIA GPUDirect. \n                      https:\/\/developer.nvidia.com\/gpudirect\n                      \n                    . Accessed 9 Sept 2018"},{"key":"10_CR6","unstructured":"OpenMPI: Open Source High Performance Computing. \n                      http:\/\/www.open-mpi.org\/\n                      \n                    . Accessed 9 Sept 2018"},{"key":"10_CR7","unstructured":"OpenSHMEM.org. \n                      http:\/\/www.openshmem.org\/site\/\n                      \n                    . Accessed 9 Sept 2018"},{"key":"10_CR8","unstructured":"Redis. \n                      https:\/\/redis.io\/\n                      \n                    . Accessed 9 Sept 2018"},{"key":"10_CR9","unstructured":"Top 500 Supercomputer sites. \n                      http:\/\/www.top500.org\/\n                      \n                    . Accessed 9 Sept 2018"},{"key":"10_CR10","unstructured":"Blott, M., Karras, K., Liu, L., Vissers, K., B\u00e4r, J., Istv\u00e1n, Z.: Achieving 10Gbps line-rate key-value stores with FPGAs. In: Presented as Part of the 5th USENIX Workshop on Hot Topics in Cloud Computing, San Jose, CA. USENIX (2013)"},{"key":"10_CR11","doi-asserted-by":"crossref","unstructured":"Chu, C.H., Hamidouche, K., Venkatesh, A., Awan, A.A., Panda, D.K.: CUDA kernel based collective reduction operations on large-scale GPU clusters. In: 2016 16th IEEE\/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGrid), pp. 726\u2013735, May 2016","DOI":"10.1109\/CCGrid.2016.111"},{"key":"10_CR12","unstructured":"Dragojevi\u0107, A., Narayanan, D., Castro, M., Hodson, O.: FaRM: fast remote memory. In: 11th USENIX Symposium on Networked Systems Design and Implementation (NSDI 14), Seattle, WA, pp. 401\u2013414. USENIX Association (2014)"},{"issue":"124","key":"10_CR13","first-page":"5","volume":"2004","author":"B Fitzpatrick","year":"2004","unstructured":"Fitzpatrick, B.: Distributed caching with memcached. Linux J. 2004(124), 5 (2004)","journal-title":"Linux J."},{"key":"10_CR14","doi-asserted-by":"crossref","unstructured":"Fu, H., Venkata, M.G., Choudhury, A.R., Imam, N., Yu, W.: High-performance key-value store on OpenSHMEM. In: 2017 17th IEEE\/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID), pp. 559\u2013568, May 2017","DOI":"10.1109\/CCGRID.2017.49"},{"key":"10_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1007\/978-3-319-50995-2_9","volume-title":"OpenSHMEM and Related Technologies. Enhancing OpenSHMEM for Hybrid Environments","author":"H Fu","year":"2016","unstructured":"Fu, H., SinghaRoy, K., Venkata, M.G., Zhu, Y., Yu, W.: SHMemCache: enabling memcached on the OpenSHMEM global address model. In: Gorentla Venkata, M., Imam, N., Pophale, S., Mintz, T.M. (eds.) OpenSHMEM 2016. LNCS, vol. 10007, pp. 131\u2013145. Springer, Cham (2016). \n                      https:\/\/doi.org\/10.1007\/978-3-319-50995-2_9"},{"key":"10_CR16","doi-asserted-by":"crossref","unstructured":"Hamidouche, K., Venkatesh, A., Awan, A.A., Subramoni, H., Chu, C.H., Panda, D.K.: Exploiting GPUDirect RDMA in designing high performance OpenSHMEM for NVIDIA GPU clusters. In: 2015 IEEE International Conference on Cluster Computing, pp. 78\u201387, September 2015","DOI":"10.1109\/CLUSTER.2015.21"},{"key":"10_CR17","doi-asserted-by":"crossref","unstructured":"Hetherington, T.H., Rogers, T.G., Hsu, L., O\u2019Connor, M., Aamodt, T.M.: Characterizing and evaluating a key-value store application on heterogeneous CPU-GPU systems. In: 2012 IEEE International Symposium on Performance Analysis of Systems Software, pp. 88\u201398, April 2012","DOI":"10.1109\/ISPASS.2012.6189209"},{"key":"10_CR18","doi-asserted-by":"crossref","unstructured":"Hetherington, T.H., O\u2019Connor, M., Aamodt, T.M.: MemcachedGPU: scaling-up scale-out key-value stores. In: Proceedings of the Sixth ACM Symposium on Cloud Computing. SoCC 2015, pp. 43\u201357. ACM, New York (2015)","DOI":"10.1145\/2806777.2806836"},{"key":"10_CR19","doi-asserted-by":"crossref","unstructured":"Jin, X., et al.: NetCache: balancing key-value stores with fast in-network caching. In: Proceedings of the 26th Symposium on Operating Systems Principles, SOSP 2017, pp. 121\u2013136. ACM, New York (2017)","DOI":"10.1145\/3132747.3132764"},{"key":"10_CR20","doi-asserted-by":"crossref","unstructured":"Kim, J., Lee, S., Vetter, J.S.: PapyrusKV: a high-performance parallel key-value store for distributed NVM architectures. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC 2017, pp. 57:1\u201357:14. ACM, New York (2017)","DOI":"10.1145\/3126908.3126943"},{"key":"10_CR21","doi-asserted-by":"crossref","unstructured":"Li, B., et al.: KV-Direct: high-performance in-memory key-value store with programmable NIC. In: Proceedings of the 26th Symposium on Operating Systems Principles, SOSP 2017, pp. 137\u2013152. ACM, New York (2017)","DOI":"10.1145\/3132747.3132756"},{"key":"10_CR22","doi-asserted-by":"crossref","unstructured":"Li, S., et al.: Architecting to achieve a billion requests per second throughput on a single key-value store server platform. In: Proceedings of the 42nd Annual International Symposium on Computer Architecture, ISCA 2015, pp. 476\u2013488, ACM, New York (2015)","DOI":"10.1145\/2872887.2750416"},{"key":"10_CR23","unstructured":"Lim, H., Han, D., Andersen, D.G., Kaminsky, M.: MICA: a holistic approach to fast in-memory key-value storage. In: 11th USENIX Symposium on Networked Systems Design and Implementation (NSDI 14), Seattle, WA, pp. 429\u2013444. USENIX Association (2014)"},{"key":"10_CR24","first-page":"50","volume":"40","author":"X Lu","year":"2017","unstructured":"Lu, X., Shankar, D., Panda, D.K.: Scalable and distributed key-value store-based data management using RDMA-memcached. IEEE Data Eng. Bull. 40, 50\u201361 (2017)","journal-title":"IEEE Data Eng. Bull."},{"key":"10_CR25","unstructured":"Mitchell, C., Geng, Y., Li, J.: Using one-sided RDMA reads to build a fast, CPU-efficient key-value store. In: Presented as Part of the 2013 USENIX Annual Technical Conference (USENIX ATC 13), San Jose, CA, pp. 103\u2013114. USENIX (2013)"},{"key":"10_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-319-73814-7_1","volume-title":"OpenSHMEM and Related Technologies. Big Compute and Big Data Convergence","author":"N Namashivayam","year":"2018","unstructured":"Namashivayam, N., et al.: Symmetric memory partitions in OpenSHMEM: a case study with Intel KNL. In: Gorentla Venkata, M., Imam, N., Pophale, S. (eds.) OpenSHMEM 2017. LNCS, vol. 10679, pp. 3\u201318. Springer, Cham (2018). \n                      https:\/\/doi.org\/10.1007\/978-3-319-73814-7_1"},{"key":"10_CR27","doi-asserted-by":"crossref","unstructured":"Potluri, S., Bureddy, D., Wang, H., Subramoni, H., Panda, D.K.: Extending OpenSHMEM for GPU computing. In: 2013 IEEE 27th International Symposium on Parallel and Distributed Processing, pp. 1001\u20131012, May 2013","DOI":"10.1109\/IPDPS.2013.104"},{"key":"10_CR28","doi-asserted-by":"crossref","unstructured":"Potluri, S., Goswami, A., Rossetti, D., Newburn, C.J., Venkata, M.G., Imam, N.: GPU-Centric communication on NVIDIA GPU clusters with InfiniBand: a case study with OpenSHMEM. In: 2017 IEEE 24th International Conference on High Performance Computing (HiPC), pp. 253\u2013262, December 2017","DOI":"10.1109\/HiPC.2017.00037"},{"key":"10_CR29","doi-asserted-by":"crossref","unstructured":"Potluri, S., Hamidouche, K., Venkatesh, A., Bureddy, D., Panda, D.: Efficient Inter-node MPI communication using GPUDirect RDMA for infiniband clusters with NVIDIA GPUs. In: 2013 42nd International Conference on Parallel Processing (ICPP), pp. 80\u201389, October 2013","DOI":"10.1109\/ICPP.2013.17"},{"key":"10_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1007\/978-3-319-73814-7_6","volume-title":"OpenSHMEM and Related Technologies. Big Compute and Big Data Convergence","author":"S Potluri","year":"2018","unstructured":"Potluri, S., Goswami, A., Venkata, M.G., Imam, N.: Efficient breadth first search on multi-GPU systems using GPU-centric OpenSHMEM. In: Gorentla Venkata, M., Imam, N., Pophale, S. (eds.) OpenSHMEM 2017. LNCS, vol. 10679, pp. 82\u201396. Springer, Cham (2018). \n                      https:\/\/doi.org\/10.1007\/978-3-319-73814-7_6"},{"issue":"10","key":"10_CR31","doi-asserted-by":"publisher","first-page":"2595","DOI":"10.1109\/TPDS.2013.222","volume":"25","author":"H Wang","year":"2014","unstructured":"Wang, H., Potluri, S., Bureddy, D., Rosales, C., Panda, D.K.: GPU-aware MPI on RDMA-enabled clusters: design, implementation and evaluation. IEEE Trans. Parallel Distrib. Syst. 25(10), 2595\u20132605 (2014). Oct","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"10_CR32","first-page":"3:1","volume":"35","author":"X Wei","year":"2015","unstructured":"Wei, X., Shi, J., Chen, Y., Chen, R., Chen, H.: Fast in-memory transaction processing using RDMA and HTM. ACM Trans. Comput. Syst. 35, 3:1\u20133:37 (2015)","journal-title":"ACM Trans. Comput. Syst."},{"issue":"11","key":"10_CR33","doi-asserted-by":"publisher","first-page":"1226","DOI":"10.14778\/2809974.2809984","volume":"8","author":"K Zhang","year":"2015","unstructured":"Zhang, K., Wang, K., Yuan, Y., Guo, L., Lee, R., Zhang, X.: Mega-KV: a case for GPUs to maximize the throughput of in-memory key-value stores. Proc. VLDB Endow. 8(11), 1226\u20131237 (2015). Jul","journal-title":"Proc. VLDB Endow."}],"container-title":["Lecture Notes in Computer Science","OpenSHMEM and Related Technologies. OpenSHMEM in the Era of Extreme Heterogeneity"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-04918-8_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,20]],"date-time":"2019-05-20T05:16:36Z","timestamp":1558329396000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-04918-8_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030049171","9783030049188"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-04918-8_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"19 March 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"OpenSHMEM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Workshop on OpenSHMEM and Related Technologies","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Baltimore, MD","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 August 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"openshmem2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.csm.ornl.gov\/workshops\/openshmem2018\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}