{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T18:34:59Z","timestamp":1773772499389,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,9,13]],"date-time":"2022-09-13T00:00:00Z","timestamp":1663027200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,9,18]]},"DOI":"10.1145\/3523227.3547405","type":"proceedings-article","created":{"date-parts":[[2022,9,13]],"date-time":"2022-09-13T14:13:46Z","timestamp":1663078426000},"page":"534-537","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":26,"title":["Merlin HugeCTR: GPU-accelerated Recommender System Training and Inference"],"prefix":"10.1145","author":[{"given":"Zehuan","family":"Wang","sequence":"first","affiliation":[{"name":"NVIDIA, China"}]},{"given":"Yingcan","family":"Wei","sequence":"additional","affiliation":[{"name":"NVIDIA, China"}]},{"given":"Minseok","family":"Lee","sequence":"additional","affiliation":[{"name":"NVIDIA, Korea, Republic of"}]},{"given":"Matthias","family":"Langer","sequence":"additional","affiliation":[{"name":"NVIDIA, China"}]},{"given":"Fan","family":"Yu","sequence":"additional","affiliation":[{"name":"NVIDIA, China"}]},{"given":"Jie","family":"Liu","sequence":"additional","affiliation":[{"name":"NVIDIA, China"}]},{"given":"Shijie","family":"Liu","sequence":"additional","affiliation":[{"name":"NVIDIA, China"}]},{"given":"Daniel G.","family":"Abel","sequence":"additional","affiliation":[{"name":"NVIDIA, China"}]},{"given":"Xu","family":"Guo","sequence":"additional","affiliation":[{"name":"NVIDIA, China"}]},{"given":"Jianbing","family":"Dong","sequence":"additional","affiliation":[{"name":"NVIDIA, China"}]},{"given":"Ji","family":"Shi","sequence":"additional","affiliation":[{"name":"NVIDIA, China"}]},{"given":"Kunlun","family":"Li","sequence":"additional","affiliation":[{"name":"NVIDIA, China"}]}],"member":"320","published-online":{"date-parts":[[2022,9,13]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"TensorFlow: A System for Large-Scale Machine Learning. In 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16)","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, Manjunath Kudlur, Josh Levenberg, Rajat Monga, Sherry Moore, Derek\u00a0G. Murray, Benoit Steiner, Paul Tucker, Vijay Vasudevan, Pete Warden, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng. 2016. TensorFlow: A System for Large-Scale Machine Learning. In 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16). USENIX Association, Savannah, GA, USA, 265\u2013283. https:\/\/www.usenix.org\/conference\/osdi16\/technical-sessions\/presentation\/abadi"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3320060"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_2_4_1","volume-title":"Clipper: A Low-Latency Online Prediction Serving System. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17)","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw, Xin Wang, Guilio Zhou, Michael\u00a0J. Franklin, Joseph\u00a0E. Gonzalez, and Ion Stoica. 2017. Clipper: A Low-Latency Online Prediction Serving System. In 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI 17). USENIX Association, Boston, MA, 613\u2013627. https:\/\/www.usenix.org\/conference\/nsdi17\/technical-sessions\/presentation\/crankshaw"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2901318.2901323"},{"key":"e_1_3_2_2_6_1","unstructured":"David Goodwin 2021. Triton Inference Server. https:\/\/github.com\/triton-inference-server\/server\/blob\/main\/docs\/model_management.md#model-control-mode-explicit. Accessed: 2022-05-15."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462976"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.5555\/3172077.3172127"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00084"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467080"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3177732.3177734"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1093\/nsr"},{"key":"e_1_3_2_2_13_1","unstructured":"David Kanter Peter Mattson 2021. ML \u00b7 Commons \/ MLperf v1.1 Results. https:\/\/mlcommons.org\/en\/training-normal-11. Accessed: 2022-03-15."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3003307"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3267809.3267840"},{"key":"e_1_3_2_2_16_1","unstructured":"Maxim Naumov Dheevatsa Mudigere Hao-Jun\u00a0Michael Shi Jianyu Huang Narayanan Sundaraman Jongsoo Park Xiaodong Wang Udit Gupta Carole-Jean Wu Alisson\u00a0G. Azzolini Dmytro Dzhulgakov Andrey Mallevich Ilia Cherniavskii Yinghai Lu Raghuraman Krishnamoorthi Ansha Yu Volodymyr Kondratenko Stephanie Pereira Xianjie Chen Wenlin Chen Vijay Rao Bill Jia Liang Xiong and Misha Smelyanskiy. 2019. Deep Learning Recommendation Model for Personalization and Recommendation Systems. CoRR abs\/1906.00091(2019) 10\u00a0pages. http:\/\/arxiv.org\/abs\/1906.00091"},{"key":"e_1_3_2_2_17_1","unstructured":"NVIDIA. 2016. The NVIDIA Collective Communications Library (NCCL). https:\/\/developer.nvidia.com\/nccl. Accessed: 2022-04-15."},{"key":"e_1_3_2_2_18_1","volume-title":"Merlin: A Framework for Building High-Performance, Deep learning-based Recommender Systems. https:\/\/developer.nvidia.com\/nvidia- merlin. Accessed: 2022-05-15.","author":"NVIDIA.","year":"2020","unstructured":"NVIDIA. 2020. Merlin: A Framework for Building High-Performance, Deep learning-based Recommender Systems. https:\/\/developer.nvidia.com\/nvidia- merlin. Accessed: 2022-05-15."},{"key":"e_1_3_2_2_19_1","unstructured":"Redis open\u00a0source project. 2021. Redis Cluster. https:\/\/redis.io\/docs\/manual\/scaling#creating-and-using-a-redis-cluster. Accessed: 2022-05-15."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.5555\/3454287.3455008"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","unstructured":"Matthias\u00a0J. Sax. 2018. Apache Kafka. Springer International Publishing Cham 1\u20138. https:\/\/doi.org\/10.1007\/978-3-319-63962-8_196-1","DOI":"10.1007\/978-3-319-63962-8_196-1"},{"key":"e_1_3_2_2_22_1","volume-title":"Meet Horovod: Uber\u2019s Open Source Distributed Deep Learning Framework for TensorFlow. https:\/\/eng.uber.com\/horovod. Accessed: 2022-05-15","author":"Sergeev Alex","year":"2017","unstructured":"Alex Sergeev and Mike Del\u00a0Balso. 2017. Meet Horovod: Uber\u2019s Open Source Distributed Deep Learning Framework for TensorFlow. https:\/\/eng.uber.com\/horovod. Accessed: 2022-05-15."},{"key":"e_1_3_2_2_23_1","unstructured":"The Linux Foundation. 2019. Open Neural Network Exchange. https:\/\/onnx.ai. Accessed: 2022-05-15."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3124749.3124754"},{"key":"e_1_3_2_2_25_1","first-page":"412","article-title":"Distributed Hierarchical GPU Parameter Server for Massive Scale Deep Learning Ads Systems","volume":"2","author":"Zhao Weijie","year":"2020","unstructured":"Weijie Zhao, Deping Xie, Ronglai Jia, Yulei Qian, Ruiquan Ding, Mingming Sun, and Ping Li. 2020. Distributed Hierarchical GPU Parameter Server for Massive Scale Deep Learning Ads Systems. Proceedings of Machine Learning and Systems 2 (2020), 412\u2013428. https:\/\/proceedings.mlsys.org\/paper\/2020\/file\/f7e6c85504ce6e82442c770f7c8606f0-Paper.pdf","journal-title":"Proceedings of Machine Learning and Systems"}],"event":{"name":"RecSys '22: Sixteenth ACM Conference on Recommender Systems","location":"Seattle WA USA","acronym":"RecSys '22","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 16th ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3523227.3547405","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3523227.3547405","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:03:01Z","timestamp":1750186981000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3523227.3547405"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9,13]]},"references-count":25,"alternative-id":["10.1145\/3523227.3547405","10.1145\/3523227"],"URL":"https:\/\/doi.org\/10.1145\/3523227.3547405","relation":{},"subject":[],"published":{"date-parts":[[2022,9,13]]},"assertion":[{"value":"2022-09-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}