{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T09:58:55Z","timestamp":1775815135826,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,14]],"date-time":"2022-08-14T00:00:00Z","timestamp":1660435200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62102110"],"award-info":[{"award-number":["62102110"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,14]]},"DOI":"10.1145\/3534678.3539058","type":"proceedings-article","created":{"date-parts":[[2022,8,12]],"date-time":"2022-08-12T19:06:41Z","timestamp":1660331201000},"page":"3388-3397","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Lion: A GPU-Accelerated Online Serving System for Web-Scale Recommendation at Baidu"],"prefix":"10.1145","author":[{"given":"Hao","family":"Liu","sequence":"first","affiliation":[{"name":"HKUST(GZ), HKUST, Guangzhou, China"}]},{"given":"Qian","family":"Gao","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}]},{"given":"Xiaochao","family":"Liao","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}]},{"given":"Guangxing","family":"Chen","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}]},{"given":"Hao","family":"Xiong","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}]},{"given":"Silin","family":"Ren","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}]},{"given":"Guobao","family":"Yang","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}]},{"given":"Zhiwei","family":"Zha","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2022,8,14]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"2021. NVIDIA CUDA Toolkit v11.6.0. https:\/\/docs.nvidia.com\/cuda\/"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2019.09.012"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/2330163.2330207"},{"key":"e_1_3_2_2_4_1","volume-title":"PipeSwitch: Fast Pipelined Context Switching for Deep Learning Applications. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20)","author":"Bai Zhihao","year":"2020","unstructured":"Zhihao Bai, Zhen Zhang, Yibo Zhu, and Xin Jin. 2020. PipeSwitch: Fast Pipelined Context Switching for Deep Learning Applications. In 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20). 499--514."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSPEC.2019.8701189"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330651"},{"key":"e_1_3_2_2_8_1","volume-title":"Intel AVX: New frontiers in performance improvements and energy efficiency. Intel white paper 19, 20","author":"Firasta Nadeem","year":"2008","unstructured":"Nadeem Firasta, Mark Buxton, Paula Jinbo, Kaveh Nasri, and Shihjong Kuo. 2008. Intel AVX: New frontiers in performance improvements and energy efficiency. Intel white paper 19, 20 (2008)."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00012"},{"key":"e_1_3_2_2_10_1","volume-title":"2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA). 982--995","author":"Gupta Udit","year":"2020","unstructured":"Udit Gupta, Samuel Hsia, Vikram Saraph, Xiaodong Wang, Brandon Reagen, Gu-Yeon Wei, Hsien-Hsin S Lee, David Brooks, and Carole-Jean Wu. 2020. Deep-RecSys: A system for optimizing end-to-end at-scale neural recommendation inference. In 2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA). 982--995."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480127"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00047"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-28869-2_16"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3038912.3052569"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-87779-0_24"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.5555\/3488766.3488792"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2010.5470421"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480051"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00070"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2640087.2644155"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467146"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2022.3153711"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2020.2985954"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTAS48715.2020.000-5"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.14"},{"key":"e_1_3_2_2_26_1","unstructured":"Han Vanholder. 2016. Efficient inference with tensorrt."},{"key":"e_1_3_2_2_27_1","volume-title":"Deep learning for computer vision: A brief review. Computational intelligence and neuroscience 2018","author":"Voulodimos Athanasios","year":"2018","unstructured":"Athanasios Voulodimos, Nikolaos Doulamis, Anastasios Doulamis, and Eftychios Protopapadakis. 2018. Deep learning for computer vision: A brief review. Computational intelligence and neuroscience 2018 (2018)."},{"key":"e_1_3_2_2_28_1","first-page":"513","article-title":"DLAU: A scalable deep learning accelerator unit on FPGA","volume":"36","author":"Wang Chao","year":"2016","unstructured":"Chao Wang, Lei Gong, Qi Yu, Xi Li, Yuan Xie, and Xuehai Zhou. 2016. DLAU: A scalable deep learning accelerator unit on FPGA. IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems 36, 3 (2016), 513--517.","journal-title":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS46320.2019.00042"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCI.2018.2840738"},{"key":"e_1_3_2_2_31_1","volume-title":"2017 USENIX Annual Technical Conference (ATC). 181--193","author":"Zhang Hao","year":"2017","unstructured":"Hao Zhang, Zeyu Zheng, Shizhen Xu, Wei Dai, Qirong Ho, Xiaodan Liang, Zhiting Hu, Jinliang Wei, Pengtao Xie, and Eric P Xing. 2017. Poseidon: An efficient communication architecture for distributed deep learning on GPU clusters. In 2017 USENIX Annual Technical Conference (ATC). 181--193."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358045"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219823"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330668"}],"event":{"name":"KDD '22: The 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Washington DC USA","acronym":"KDD '22","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539058","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3534678.3539058","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:09:50Z","timestamp":1750183790000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539058"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,14]]},"references-count":34,"alternative-id":["10.1145\/3534678.3539058","10.1145\/3534678"],"URL":"https:\/\/doi.org\/10.1145\/3534678.3539058","relation":{},"subject":[],"published":{"date-parts":[[2022,8,14]]},"assertion":[{"value":"2022-08-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}