{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T08:02:40Z","timestamp":1764403360203,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":16,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1145\/3589335.3648326","type":"proceedings-article","created":{"date-parts":[[2024,5,12]],"date-time":"2024-05-12T18:41:21Z","timestamp":1715539281000},"page":"284-291","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Cache-Aware Reinforcement Learning in Large-Scale Recommender Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1267-1680","authenticated-orcid":false,"given":"Xiaoshuang","family":"Chen","sequence":"first","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8709-3083","authenticated-orcid":false,"given":"Gengrui","family":"Zhang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4667-5523","authenticated-orcid":false,"given":"Yao","family":"Wang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-9609-8653","authenticated-orcid":false,"given":"Yulin","family":"Wu","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-2775-9956","authenticated-orcid":false,"given":"Shuo","family":"Su","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1642-7840","authenticated-orcid":false,"given":"Kaiqiao","family":"Zhan","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1329-3876","authenticated-orcid":false,"given":"Ben","family":"Wang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,5,13]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Reinforcing User Retention in a Billion Scale Short Video Recommender System. arXiv preprint arXiv:2302.01724","author":"Cai Qingpeng","year":"2023","unstructured":"Qingpeng Cai, Shuchang Liu, Xueliang Wang, Tianyou Zuo, Wentao Xie, Bin Yang, Dong Zheng, Peng Jiang, and Kun Gai. 2023. Reinforcing User Retention in a Billion Scale Short Video Recommender System. arXiv preprint arXiv:2302.01724 (2023)."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_2_3_1","volume-title":"International conference on machine learning. PMLR, 1587--1596","author":"Fujimoto Scott","year":"2018","unstructured":"Scott Fujimoto, Herke Hoof, and David Meger. 2018. Addressing function approximation error in actor-critic methods. In International conference on machine learning. PMLR, 1587--1596."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557220"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/WoWMoM.2018.8449731"},{"key":"e_1_3_2_2_6_1","volume-title":"DCAF: A Dynamic Computation Allocation Framework for Online Serving System. arXiv preprint arXiv:2006.09684","author":"Jiang Biye","year":"2020","unstructured":"Biye Jiang, Pengye Zhang, Rihan Chen, Xinchen Luo, Yin Yang, Guan Wang, Guorui Zhou, Xiaoqiang Zhu, and Kun Gai. 2020. DCAF: A Dynamic Computation Allocation Framework for Online Serving System. arXiv preprint arXiv:2006.09684 (2020)."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_2_2_8_1","volume-title":"Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971","author":"Lillicrap Timothy P","year":"2015","unstructured":"Timothy P Lillicrap, Jonathan J Hunt, Alexander Pritzel, Nicolas Heess, Tom Erez, Yuval Tassa, David Silver, and Daan Wierstra. 2015. Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098011"},{"volume-title":"Monte-Carlo simulation, and machine learning.","author":"Rubinstein Reuven Y","key":"e_1_3_2_2_10_1","unstructured":"Reuven Y Rubinstein and Dirk P Kroese. 2004. The cross-entropy method: a unified approach to combinatorial optimization, Monte-Carlo simulation, and machine learning. Vol. 133. Springer."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCOMM.2017.2705695"},{"key":"e_1_3_2_2_12_1","volume-title":"Cold: Towards the next generation of pre-ranking system. arXiv preprint arXiv:2007.16122","author":"Wang Zhe","year":"2020","unstructured":"Zhe Wang, Liqin Zhao, Biye Jiang, Guorui Zhou, Xiaoqiang Zhu, and Kun Gai. 2020. Cold: Towards the next generation of pre-ranking system. arXiv preprint arXiv:2007.16122 (2020)."},{"key":"e_1_3_2_2_13_1","volume-title":"ResAct: Reinforcing Long-term Engagement in Sequential Recommendation with Residual Actor. arXiv preprint arXiv:2206.02620","author":"Xue Wanqi","year":"2022","unstructured":"Wanqi Xue, Qingpeng Cai, Ruohan Zhan, Dong Zheng, Peng Jiang, and Bo An. 2022. ResAct: Reinforcing Long-term Engagement in Sequential Recommendation with Residual Actor. arXiv preprint arXiv:2206.02620 (2022)."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539092"},{"key":"e_1_3_2_2_15_1","volume-title":"UNEX-RL: Reinforcing Long-Term Rewards in Multi-Stage Recommender Systems with UNidirectional EXecution. arXiv preprint arXiv:2401.06470","author":"Zhang Gengrui","year":"2024","unstructured":"Gengrui Zhang, Yao Wang, Xiaoshuang Chen, Hongyi Qian, Kaiqiao Zhan, and Ben Wang. 2024. UNEX-RL: Reinforcing Long-Term Rewards in Multi-Stage Recommender Systems with UNidirectional EXecution. arXiv preprint arXiv:2401.06470 (2024)."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330668"}],"event":{"name":"WWW '24: The ACM Web Conference 2024","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Singapore Singapore","acronym":"WWW '24"},"container-title":["Companion Proceedings of the ACM Web Conference 2024"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589335.3648326","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3589335.3648326","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:36:18Z","timestamp":1755822978000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589335.3648326"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":16,"alternative-id":["10.1145\/3589335.3648326","10.1145\/3589335"],"URL":"https:\/\/doi.org\/10.1145\/3589335.3648326","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]},"assertion":[{"value":"2024-05-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}