{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:03:44Z","timestamp":1750309424279,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":29,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T00:00:00Z","timestamp":1728345600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,8]]},"DOI":"10.1145\/3640457.3688128","type":"proceedings-article","created":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T15:39:28Z","timestamp":1728401968000},"page":"670-679","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["RPAF: A Reinforcement Prediction-Allocation Framework for Cache Allocation in Large-Scale Recommender Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-2775-9956","authenticated-orcid":false,"given":"Shuo","family":"Su","sequence":"first","affiliation":[{"name":"Kuaishou Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1267-1680","authenticated-orcid":false,"given":"Xiaoshuang","family":"Chen","sequence":"additional","affiliation":[{"name":"Recommendation, Kuaishou Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4667-5523","authenticated-orcid":false,"given":"Yao","family":"Wang","sequence":"additional","affiliation":[{"name":"Recommender System, Kuaishou Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0661-6562","authenticated-orcid":false,"given":"Yulin","family":"Wu","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2040-6405","authenticated-orcid":false,"given":"Ziqiang","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tsinghua University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1642-7840","authenticated-orcid":false,"given":"Kaiqiao","family":"Zhan","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1329-3876","authenticated-orcid":false,"given":"Ben","family":"Wang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3636-3618","authenticated-orcid":false,"given":"Kun","family":"Gai","sequence":"additional","affiliation":[{"name":"Unaffiliated, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,8]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543846"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583259"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013312"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220122"},{"key":"e_1_3_2_2_5_1","volume-title":"Cache-Aware Reinforcement Learning in Large-Scale Recommender Systems. arXiv preprint arXiv:2401.06470","author":"Chen Xiaoshuang","year":"2024","unstructured":"Xiaoshuang Chen, Gengrui Zhang, Yao Wang, Yulin Wu, Kaiqiao Zhan, and Ben Wang. 2024. Cache-Aware Reinforcement Learning in Large-Scale Recommender Systems. arXiv preprint arXiv:2401.06470 (2024)."},{"key":"e_1_3_2_2_6_1","first-page":"1","article-title":"Risk-constrained reinforcement learning with percentile risk criteria","volume":"18","author":"Chow Yinlam","year":"2018","unstructured":"Yinlam Chow, Mohammad Ghavamzadeh, Lucas Janson, and Marco Pavone. 2018. Risk-constrained reinforcement learning with percentile risk criteria. Journal of Machine Learning Research 18, 167 (2018), 1\u201351.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_7_1","volume-title":"Safe exploration in continuous action spaces. arXiv preprint arXiv:1801.08757","author":"Dalal Gal","year":"2018","unstructured":"Gal Dalal, Krishnamurthy Dvijotham, Matej Vecerik, Todd Hester, Cosmin Paduraru, and Yuval Tassa. 2018. Safe exploration in continuous action spaces. arXiv preprint arXiv:1801.08757 (2018)."},{"key":"e_1_3_2_2_8_1","volume-title":"Weakly Coupled Deep Q-Networks. Advances in Neural Information Processing Systems 36","author":"El\u00a0Shar Ibrahim","year":"2024","unstructured":"Ibrahim El\u00a0Shar and Daniel Jiang. 2024. Weakly Coupled Deep Q-Networks. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_2_9_1","volume-title":"International conference on machine learning. PMLR, 1587\u20131596","author":"Fujimoto Scott","year":"2018","unstructured":"Scott Fujimoto, Herke Hoof, and David Meger. 2018. Addressing function approximation error in actor-critic methods. In International conference on machine learning. PMLR, 1587\u20131596."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557220"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2886795"},{"key":"e_1_3_2_2_12_1","volume-title":"International conference on machine learning. PMLR","author":"Haarnoja Tuomas","year":"2018","unstructured":"Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, and Sergey Levine. 2018. Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In International conference on machine learning. PMLR, 1861\u20131870."},{"key":"e_1_3_2_2_13_1","volume-title":"Dcaf: a dynamic computation allocation framework for online serving system. arXiv preprint arXiv:2006.09684","author":"Jiang Biye","year":"2020","unstructured":"Biye Jiang, Pengye Zhang, Rihan Chen, Xinchen Luo, Yin Yang, Guan Wang, Guorui Zhou, Xiaoqiang Zhu, and Kun Gai. 2020. Dcaf: a dynamic computation allocation framework for online serving system. arXiv preprint arXiv:2006.09684 (2020)."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_2_2_15_1","volume-title":"Deep constrained q-learning. arXiv preprint arXiv:2003.09398","author":"Kalweit Gabriel","year":"2020","unstructured":"Gabriel Kalweit, Maria Huegle, Moritz Werling, and Joschka Boedecker. 2020. Deep constrained q-learning. arXiv preprint arXiv:2003.09398 (2020)."},{"key":"e_1_3_2_2_16_1","volume-title":"Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971","author":"Lillicrap P","year":"2015","unstructured":"Timothy\u00a0P Lillicrap, Jonathan\u00a0J Hunt, Alexander Pritzel, Nicolas Heess, Tom Erez, Yuval Tassa, David Silver, and Daan Wierstra. 2015. Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219950"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098011"},{"volume-title":"The 30th international joint conference on artificial intelligence (ijcai).","author":"Liu Yongshuai","key":"e_1_3_2_2_19_1","unstructured":"Yongshuai Liu, Avishai Halev, and Xin Liu. 2021. Policy learning with constraints in model-free reinforcement learning: A survey. In The 30th international joint conference on artificial intelligence (ijcai)."},{"key":"e_1_3_2_2_20_1","volume-title":"GreenFlow: a computation allocation framework for building environmentally sound recommendation system. arXiv preprint arXiv:2312.16176","author":"Lu Xingyu","year":"2023","unstructured":"Xingyu Lu, Zhining Liu, Yanchu Guan, Hongxuan Zhang, Chenyi Zhuang, Wenqi Ma, Yize Tan, Jinjie Gu, and Guannan Zhang. 2023. GreenFlow: a computation allocation framework for building environmentally sound recommendation system. arXiv preprint arXiv:2312.16176 (2023)."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380130"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412744"},{"volume-title":"Reinforcement learning: An introduction","author":"Sutton S","key":"e_1_3_2_2_23_1","unstructured":"Richard\u00a0S Sutton and Andrew\u00a0G Barto. 2018. Reinforcement learning: An introduction. MIT press."},{"key":"e_1_3_2_2_24_1","volume-title":"Reward constrained policy optimization. arXiv preprint arXiv:1805.11074","author":"Tessler Chen","year":"2018","unstructured":"Chen Tessler, Daniel\u00a0J Mankowitz, and Shie Mannor. 2018. Reward constrained policy optimization. arXiv preprint arXiv:1805.11074 (2018)."},{"key":"e_1_3_2_2_25_1","volume-title":"Cold: Towards the next generation of pre-ranking system. arXiv preprint arXiv:2007.16122","author":"Wang Zhe","year":"2020","unstructured":"Zhe Wang, Liqin Zhao, Biye Jiang, Guorui Zhou, Xiaoqiang Zhu, and Kun Gai. 2020. Cold: Towards the next generation of pre-ranking system. arXiv preprint arXiv:2007.16122 (2020)."},{"key":"e_1_3_2_2_26_1","volume-title":"Computation resource allocation solution in recommender systems. arXiv preprint arXiv:2103.02259","author":"Yang Xun","year":"2021","unstructured":"Xun Yang, Yunli Wang, Cheng Chen, Qing Tan, Chuan Yu, Jian Xu, and Xiaoqiang Zhu. 2021. Computation resource allocation solution in recommender systems. arXiv preprint arXiv:2103.02259 (2021)."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219886"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583313"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219826"}],"event":{"name":"RecSys '24: 18th ACM Conference on Recommender Systems","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval","SIGCHI ACM Special Interest Group on Computer-Human Interaction"],"location":"Bari Italy","acronym":"RecSys '24"},"container-title":["18th ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640457.3688128","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3640457.3688128","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:32Z","timestamp":1750294712000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640457.3688128"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,8]]},"references-count":29,"alternative-id":["10.1145\/3640457.3688128","10.1145\/3640457"],"URL":"https:\/\/doi.org\/10.1145\/3640457.3688128","relation":{},"subject":[],"published":{"date-parts":[[2024,10,8]]},"assertion":[{"value":"2024-10-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}