{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T09:59:25Z","timestamp":1775815165075,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,8,14]],"date-time":"2021-08-14T00:00:00Z","timestamp":1628899200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,8,14]]},"DOI":"10.1145\/3447548.3467146","type":"proceedings-article","created":{"date-parts":[[2021,8,12]],"date-time":"2021-08-12T06:12:09Z","timestamp":1628748729000},"page":"3289-3298","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":22,"title":["JIZHI: A Fast and Cost-Effective Model-As-A-Service System for Web-Scale Online Inference at Baidu"],"prefix":"10.1145","author":[{"given":"Hao","family":"Liu","sequence":"first","affiliation":[{"name":"Baidu, Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qian","family":"Gao","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiang","family":"Li","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaochao","family":"Liao","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hao","family":"Xiong","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guangxing","family":"Chen","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenlin","family":"Wang","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guobao","family":"Yang","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiwei","family":"Zha","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daxiang","family":"Dong","sequence":"additional","affiliation":[{"name":"Baidu Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dejing","family":"Dou","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haoyi","family":"Xiong","sequence":"additional","affiliation":[{"name":"Baidu, Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,8,14]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2019.09.012"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2330163.2330207"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"crossref","unstructured":"Brian Babcock Mayur Datar and Rajeev Motwani. 2007. Load shedding in data stream systems. In Data Streams. 127--147.  Brian Babcock Mayur Datar and Rajeev Motwani. 2007. Load shedding in data stream systems. In Data Streams. 127--147.","DOI":"10.1007\/978-0-387-47534-9_7"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330744"},{"key":"e_1_3_2_2_5_1","unstructured":"Jake Brutlag. 2009. Speed Matters for Google Web Search. https:\/\/services.google.com\/fh\/files\/blogs\/google_delayexp.pdf  Jake Brutlag. 2009. Speed Matters for Google Web Search. https:\/\/services.google.com\/fh\/files\/blogs\/google_delayexp.pdf"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/0022-0000(79)90044-8"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"e_1_3_2_2_8_1","volume-title":"A survey of model compression and acceleration for deep neural networks. arXiv preprint arXiv:1710.09282","author":"Cheng Yu","year":"2017","unstructured":"Yu Cheng , Duo Wang , Pan Zhou , and Tao Zhang . 2017. A survey of model compression and acceleration for deep neural networks. arXiv preprint arXiv:1710.09282 ( 2017 ). Yu Cheng, Duo Wang, Pan Zhou, and Tao Zhang. 2017. A survey of model compression and acceleration for deep neural networks. arXiv preprint arXiv:1710.09282 (2017)."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330651"},{"key":"e_1_3_2_2_10_1","volume-title":"Intel AVX: New frontiers in performance improvements and energy efficiency. Intel white paper","author":"Firasta Nadeem","year":"2008","unstructured":"Nadeem Firasta , Mark Buxton , Paula Jinbo , Kaveh Nasri , and Shihjong Kuo . 2008. Intel AVX: New frontiers in performance improvements and energy efficiency. Intel white paper , Vol. 19 , 20 ( 2008 ). Nadeem Firasta, Mark Buxton, Paula Jinbo, Kaveh Nasri, and Shihjong Kuo. 2008. Intel AVX: New frontiers in performance improvements and energy efficiency. Intel white paper, Vol. 19, 20 (2008)."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00012"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3406709"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330670"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3038912.3052569"},{"key":"e_1_3_2_2_15_1","volume-title":"Session-based recommendations with recurrent neural networks. arXiv preprint arXiv:1511.06939","author":"Hidasi Bal\u00e1zs","year":"2015","unstructured":"Bal\u00e1zs Hidasi , Alexandros Karatzoglou , Linas Baltrunas , and Domonkos Tikk . 2015. Session-based recommendations with recurrent neural networks. arXiv preprint arXiv:1511.06939 ( 2015 ). Bal\u00e1zs Hidasi, Alexandros Karatzoglou, Linas Baltrunas, and Domonkos Tikk. 2015. Session-based recommendations with recurrent neural networks. arXiv preprint arXiv:1511.06939 (2015)."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219843"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330973"},{"key":"e_1_3_2_2_19_1","volume-title":"Deep learning. nature","author":"LeCun Yann","year":"2015","unstructured":"Yann LeCun , Yoshua Bengio , and Geoffrey Hinton . 2015. Deep learning. nature , Vol. 521 , 7553 ( 2015 ), 436--444. Yann LeCun, Yoshua Bengio, and Geoffrey Hinton. 2015. Deep learning. nature, Vol. 521, 7553 (2015), 436--444."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/301453.301487"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.14778\/3430915.3430924"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2020.2985954"},{"key":"e_1_3_2_2_23_1","volume-title":"Acoustic modeling using deep belief networks","author":"Dahl George E","year":"2011","unstructured":"Abdel-rahman Mohamed, George E Dahl , and Geoffrey Hinton . 2011. Acoustic modeling using deep belief networks . IEEE transactions on audio, speech, and language processing, Vol. 20 , 1 ( 2011 ), 14--22. Abdel-rahman Mohamed, George E Dahl, and Geoffrey Hinton. 2011. Acoustic modeling using deep belief networks. IEEE transactions on audio, speech, and language processing, Vol. 20, 1 (2011), 14--22."},{"key":"e_1_3_2_2_24_1","volume-title":"International Conference on Machine Learning .","author":"Ngiam Jiquan","year":"2011","unstructured":"Jiquan Ngiam , Aditya Khosla , Mingyu Kim , Juhan Nam , Honglak Lee , and Andrew Y Ng . 2011 . Multimodal deep learning . In International Conference on Machine Learning . Jiquan Ngiam, Aditya Khosla, Mingyu Kim, Juhan Nam, Honglak Lee, and Andrew Y Ng. 2011. Multimodal deep learning. In International Conference on Machine Learning ."},{"key":"e_1_3_2_2_25_1","volume-title":"High-Performance ML Serving. In Workshop on ML Systems at NIPS 2017 .","author":"Olston Christopher","year":"2017","unstructured":"Christopher Olston , Fangwei Li , Jeremiah Harmsen , Jordan Soyke , Kiril Gorovoy , Li Lao , Noah Fiedel , Sukriti Ramesh , and Vinu Rajashekhar . 2017 . TensorFlow-Serving: Flexible , High-Performance ML Serving. In Workshop on ML Systems at NIPS 2017 . Christopher Olston, Fangwei Li, Jeremiah Harmsen, Jordan Soyke, Kiril Gorovoy, Li Lao, Noah Fiedel, Sukriti Ramesh, and Vinu Rajashekhar. 2017. TensorFlow-Serving: Flexible, High-Performance ML Serving. In Workshop on ML Systems at NIPS 2017 ."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2014.7478821"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/HCS49909.2020.9220641"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2008.917757"},{"key":"e_1_3_2_2_29_1","unstructured":"Jongsoo Park Maxim Naumov Protonu Basu Summer Deng Aravind Kalaiah Daya Khudia James Law Parth Malani Andrey Malevich Satish Nadathur etal 2018. Deep learning inference in facebook data centers: Characterization performance optimizations and hardware implications. arXiv preprint arXiv:1811.09886 (2018).  Jongsoo Park Maxim Naumov Protonu Basu Summer Deng Aravind Kalaiah Daya Khudia James Law Parth Malani Andrey Malevich Satish Nadathur et al. 2018. Deep learning inference in facebook data centers: Characterization performance optimizations and hardware implications. arXiv preprint arXiv:1811.09886 (2018)."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3376927"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.5555\/1315451.1315479"},{"key":"e_1_3_2_2_32_1","unstructured":"Han Vanholder. 2016. Efficient inference with TensorRT.  Han Vanholder. 2016. Efficient inference with TensorRT."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330782"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638950"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358045"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219823"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330668"}],"event":{"name":"KDD '21: The 27th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Virtual Event Singapore","acronym":"KDD '21","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery &amp; Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3447548.3467146","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3447548.3467146","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:18:27Z","timestamp":1750191507000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3447548.3467146"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,14]]},"references-count":37,"alternative-id":["10.1145\/3447548.3467146","10.1145\/3447548"],"URL":"https:\/\/doi.org\/10.1145\/3447548.3467146","relation":{},"subject":[],"published":{"date-parts":[[2021,8,14]]},"assertion":[{"value":"2021-08-14","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}