{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T21:40:50Z","timestamp":1742938850030,"version":"3.40.3"},"publisher-location":"Cham","reference-count":17,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031306365"},{"type":"electronic","value":"9783031306372"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-30637-2_36","type":"book-chapter","created":{"date-parts":[[2023,4,13]],"date-time":"2023-04-13T10:08:13Z","timestamp":1681380493000},"page":"546-561","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Accelerating Recommendation Inference via\u00a0GPU Streams"],"prefix":"10.1007","author":[{"given":"Yuean","family":"Niu","sequence":"first","affiliation":[]},{"given":"Zhizhen","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Chen","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Jiaqiang","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,4,14]]},"reference":[{"issue":"1","key":"36_CR1","doi-asserted-by":"publisher","first-page":"127","DOI":"10.14778\/3485450.3485462","volume":"15","author":"M Adnan","year":"2021","unstructured":"Adnan, M., Maboud, Y.E., Mahajan, D., Nair, P.J.: Accelerating recommendation system training by leveraging popular choices. Proc. VLDB Endow. (PVLDB) 15(1), 127\u2013140 (2021)","journal-title":"Proc. VLDB Endow. (PVLDB)"},{"doi-asserted-by":"crossref","unstructured":"Bian, W., et al.: Can: feature co-action network for click-through rate prediction. In: Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining (WSDM), pp. 57\u201365 (2022)","key":"36_CR2","DOI":"10.1145\/3488560.3498435"},{"doi-asserted-by":"crossref","unstructured":"Chen, Q., Zhao, H., Li, W., Huang, P., Ou, W.: Behavior sequence transformer for e-commerce recommendation in alibaba. In: Proceedings of the 1st International Workshop on Deep Learning Practice for High-Dimensional Sparse Data (DLP-KDD), pp. 1\u20134 (2019)","key":"36_CR3","DOI":"10.1145\/3326937.3341261"},{"unstructured":"Crankshaw, D., Wang, X., Zhou, G., Franklin, M.J., Gonzalez, J.E., Stoica, I.: Clipper: a low-latency online prediction serving system. In: Proceedings of the 14th USENIX Symposium on Networked Systems Design and Implementation (NSDI), pp. 613\u2013627 (2017)","key":"36_CR4"},{"doi-asserted-by":"crossref","unstructured":"Cui, W., Zhao, H., Chen, Q., Zheng, N., et al.: Enable simultaneous DNN services based on deterministic operator overlap and precise latency prediction. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC), pp. 15:1\u201315:15 (2021)","key":"36_CR5","DOI":"10.1145\/3458817.3476143"},{"doi-asserted-by":"crossref","unstructured":"Dhakal, A., Kulkarni, S.G., Ramakrishnan, K.K.: GSLICE: controlled spatial sharing of GPUs for a scalable inference platform. In: Proceedings of the ACM Symposium on Cloud Computing (SoCC), pp. 492\u2013506 (2020)","key":"36_CR6","DOI":"10.1145\/3419111.3421284"},{"doi-asserted-by":"crossref","unstructured":"Gupta, U., et al.: DeepRecSys: a system for optimizing end-to-end at-scale neural recommendation inference. In: Proceedings of the 47th ACM\/IEEE Annual International Symposium on Computer Architecture (ISCA), pp. 982\u2013995 (2020)","key":"36_CR7","DOI":"10.1109\/ISCA45697.2020.00084"},{"doi-asserted-by":"crossref","unstructured":"Gupta, U., Wu, C., Wang, X., Naumov, M., et al.: The architectural implications of Facebook\u2019s DNN-based personalized recommendation. In: Proceedings of the IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 488\u2013501 (2020)","key":"36_CR8","DOI":"10.1109\/HPCA47549.2020.00047"},{"doi-asserted-by":"crossref","unstructured":"Hu, Y., Rallapalli, S., Ko, B., Govindan, R.: Olympian: scheduling GPU usage in a deep neural network model serving system. In: Proceedings of the 19th International Middleware Conference (Middleware), pp. 53\u201365 (2018)","key":"36_CR9","DOI":"10.1145\/3274808.3274813"},{"doi-asserted-by":"crossref","unstructured":"Ke, L., Gupta, U., Hempstead, M., Wu, C., Lee, H.S., Zhang, X.: Hercules: heterogeneity-aware inference serving for at-scale personalized recommendation. In: Proceedings of the IEEE International Symposium on High-Performance Computer Architecture (HPCA), pp. 141\u2013144 (2022)","key":"36_CR10","DOI":"10.1109\/HPCA53966.2022.00019"},{"unstructured":"Kwon, W., Yu, G., Jeong, E., Chun, B.: Nimble: lightweight and parallel GPU task scheduling for deep learning. In: Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems (NeurIPS) (2020)","key":"36_CR11"},{"doi-asserted-by":"crossref","unstructured":"Liu, H., et al.: JIZHI: a fast and cost-effective model-as-a-service system for web-scale online inference at Baidu. In: Proceedings of the 27th ACM Conference on Knowledge Discovery and Data Mining (SIGKDD), pp. 3289\u20133298 (2021)","key":"36_CR12","DOI":"10.1145\/3447548.3467146"},{"issue":"2","key":"36_CR13","doi-asserted-by":"publisher","first-page":"312","DOI":"10.14778\/3489496.3489511","volume":"15","author":"X Miao","year":"2021","unstructured":"Miao, X., et al.: HET: scaling out huge embedding model training via cache-enabled distributed framework. Proc. VLDB Endow. (PVLDB) 15(2), 312\u2013320 (2021)","journal-title":"Proc. VLDB Endow. (PVLDB)"},{"unstructured":"Olston, C., et al.: Tensorflow-serving: flexible, high-performance ML serving. CoRR abs\/1712.06139 (2017)","key":"36_CR14"},{"doi-asserted-by":"crossref","unstructured":"Pi, Q., et al.: Search-based user interest modeling with lifelong sequential behavior data for click-through rate prediction. In: Proceedings of the 29th ACM International Conference on Information & Knowledge Management (CIKM), pp. 2685\u20132692 (2020)","key":"36_CR15","DOI":"10.1145\/3340531.3412744"},{"doi-asserted-by":"crossref","unstructured":"Zhang, Y., Chen, L., Yang, S., Yuan, M., et al.: PICASSO: unleashing the potential of GPU-centric training for wide-and-deep recommender systems. In: Proceedings of the 38th IEEE International Conference on Data Engineering (ICDE), pp. 3453\u20133466 (2022)","key":"36_CR16","DOI":"10.1109\/ICDE53745.2022.00324"},{"doi-asserted-by":"crossref","unstructured":"Zhou, G., et al.: Deep interest evolution network for click-through rate prediction. In: Proceedings of the Thirty-Third AAAI Conference on Artificial Intelligence (AAAI), pp. 5941\u20135948 (2019)","key":"36_CR17","DOI":"10.1609\/aaai.v33i01.33015941"}],"container-title":["Lecture Notes in Computer Science","Database Systems for Advanced Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-30637-2_36","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T17:16:03Z","timestamp":1710263763000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-30637-2_36"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031306365","9783031306372"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-30637-2_36","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"14 April 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DASFAA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Database Systems for Advanced Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tianjin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 April 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 April 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dasfaa2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.tjudb.cn\/dasfaa2023\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"652","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"125","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"66","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"19% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7.3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}