{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T15:56:46Z","timestamp":1759939006976,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,30]],"date-time":"2023-04-30T00:00:00Z","timestamp":1682812800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Meituan"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,30]]},"DOI":"10.1145\/3543507.3583313","type":"proceedings-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T23:30:25Z","timestamp":1682551825000},"page":"3214-3224","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["RL-MPCA: A Reinforcement Learning Based Multi-Phase Computation Allocation Approach for Recommender Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1319-2369","authenticated-orcid":false,"given":"Jiahong","family":"Zhou","sequence":"first","affiliation":[{"name":"Meituan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5429-3185","authenticated-orcid":false,"given":"Shunhui","family":"Mao","sequence":"additional","affiliation":[{"name":"Meituan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2286-3240","authenticated-orcid":false,"given":"Guoliang","family":"Yang","sequence":"additional","affiliation":[{"name":"Meituan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7129-0250","authenticated-orcid":false,"given":"Bo","family":"Tang","sequence":"additional","affiliation":[{"name":"Meituan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5400-7924","authenticated-orcid":false,"given":"Qianlong","family":"Xie","sequence":"additional","affiliation":[{"name":"Meituan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4809-4395","authenticated-orcid":false,"given":"Lebin","family":"Lin","sequence":"additional","affiliation":[{"name":"Meituan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5495-0827","authenticated-orcid":false,"given":"Xingxing","family":"Wang","sequence":"additional","affiliation":[{"name":"Meituan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1964-3984","authenticated-orcid":false,"given":"Dong","family":"Wang","sequence":"additional","affiliation":[{"name":"Meituan, China"}]}],"member":"320","published-online":{"date-parts":[[2023,4,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"International conference on machine learning. PMLR, 22\u201331","author":"Achiam Joshua","year":"2017","unstructured":"Joshua Achiam, David Held, Aviv Tamar, and Pieter Abbeel. 2017. Constrained policy optimization. In International conference on machine learning. PMLR, 22\u201331."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1287\/opre.1070.0445"},{"key":"e_1_3_2_1_3_1","volume-title":"International Conference on Machine Learning. PMLR, 104\u2013114","author":"Agarwal Rishabh","year":"2020","unstructured":"Rishabh Agarwal, Dale Schuurmans, and Mohammad Norouzi. 2020. An optimistic perspective on offline reinforcement learning. In International Conference on Machine Learning. PMLR, 104\u2013114."},{"volume-title":"Constrained Markov decision processes","author":"Altman Eitan","key":"e_1_3_2_1_4_1","unstructured":"Eitan Altman. 1999. Constrained Markov decision processes. Routledge."},{"key":"e_1_3_2_1_5_1","volume-title":"PID control system analysis, design, and technology","author":"Ang Kiam\u00a0Heong","year":"2005","unstructured":"Kiam\u00a0Heong Ang, Gregory Chong, and Yun Li. 2005. PID control system analysis, design, and technology. IEEE transactions on control systems technology 13, 4 (2005), 559\u2013576."},{"key":"e_1_3_2_1_6_1","volume-title":"Random search for hyper-parameter optimization.Journal of machine learning research 13, 2","author":"Bergstra James","year":"2012","unstructured":"James Bergstra and Yoshua Bengio. 2012. Random search for hyper-parameter optimization.Journal of machine learning research 13, 2 (2012)."},{"key":"e_1_3_2_1_7_1","unstructured":"Craig Boutilier and Tyler Lu. 2016. Budget allocation using weakly coupled constrained Markov decision processes. (2016)."},{"key":"e_1_3_2_1_8_1","volume-title":"Apache flink: Stream and batch processing in a single engine. Bulletin of the IEEE Computer Society Technical Committee on Data Engineering 36, 4","author":"Carbone Paris","year":"2015","unstructured":"Paris Carbone, Asterios Katsifodimos, Stephan Ewen, Volker Markl, Seif Haridi, and Kostas Tzoumas. 2015. Apache flink: Stream and batch processing in a single engine. Bulletin of the IEEE Computer Society Technical Committee on Data Engineering 36, 4 (2015)."},{"key":"e_1_3_2_1_9_1","volume-title":"BCRLSP: An Offline Reinforcement Learning Framework for Sequential Targeted Promotion. arXiv preprint arXiv:2207.07790","author":"Chen Fanglin","year":"2022","unstructured":"Fanglin Chen, Xiao Liu, Bo Tang, Feiyu Xiong, Serim Hwang, and Guomian Zhuang. 2022. BCRLSP: An Offline Reinforcement Learning Framework for Sequential Targeted Promotion. arXiv preprint arXiv:2207.07790 (2022)."},{"key":"e_1_3_2_1_10_1","volume-title":"A primal-dual approach to constrained markov decision processes. arXiv preprint arXiv:2101.10895","author":"Chen Yi","year":"2021","unstructured":"Yi Chen, Jing Dong, and Zhaoran Wang. 2021. A primal-dual approach to constrained markov decision processes. arXiv preprint arXiv:2101.10895 (2021)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2959100.2959190"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462913"},{"key":"e_1_3_2_1_13_1","volume-title":"Training with quantization noise for extreme model compression. arXiv preprint arXiv:2004.07320","author":"Fan Angela","year":"2020","unstructured":"Angela Fan, Pierre Stock, Benjamin Graham, Edouard Grave, R\u00e9mi Gribonval, Herve Jegou, and Armand Joulin. 2020. Training with quantization noise for extreme model compression. arXiv preprint arXiv:2004.07320 (2020)."},{"key":"e_1_3_2_1_14_1","volume-title":"Deep session interest network for click-through rate prediction. arXiv preprint arXiv:1905.06482","author":"Feng Yufei","year":"2019","unstructured":"Yufei Feng, Fuyu Lv, Weichen Shen, Menghan Wang, Fei Sun, Yu Zhu, and Keping Yang. 2019. Deep session interest network for click-through rate prediction. arXiv preprint arXiv:1905.06482 (2019)."},{"key":"e_1_3_2_1_15_1","volume-title":"D4rl: Datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219","author":"Fu Justin","year":"2020","unstructured":"Justin Fu, Aviral Kumar, Ofir Nachum, George Tucker, and Sergey Levine. 2020. D4rl: Datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219 (2020)."},{"key":"e_1_3_2_1_16_1","volume-title":"Benchmarking batch deep reinforcement learning algorithms. arXiv preprint arXiv:1910.01708","author":"Fujimoto Scott","year":"2019","unstructured":"Scott Fujimoto, Edoardo Conti, Mohammad Ghavamzadeh, and Joelle Pineau. 2019. Benchmarking batch deep reinforcement learning algorithms. arXiv preprint arXiv:1910.01708 (2019)."},{"key":"e_1_3_2_1_17_1","volume-title":"International conference on machine learning. PMLR","author":"Fujimoto Scott","year":"2019","unstructured":"Scott Fujimoto, David Meger, and Doina Precup. 2019. Off-policy deep reinforcement learning without exploration. In International conference on machine learning. PMLR, 2052\u20132062."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2019.00122"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467199"},{"key":"e_1_3_2_1_20_1","volume-title":"Abdul Monem\u00a0S Rahma, and Hala Bahjat\u00a0Abdul Wahab","author":"Tawfiq\u00a0Abdul Hussien Farah","year":"2021","unstructured":"Farah Tawfiq\u00a0Abdul Hussien, Abdul Monem\u00a0S Rahma, and Hala Bahjat\u00a0Abdul Wahab. 2021. Recommendation systems for e-commerce systems an overview. In Journal of Physics: Conference Series, Vol.\u00a01897. IOP Publishing, 012024."},{"key":"e_1_3_2_1_21_1","volume-title":"DCAF: A Dynamic computation resource allocation Framework for Online Serving System. arXiv preprint arXiv:2006.09684","author":"Jiang Biye","year":"2020","unstructured":"Biye Jiang, Pengye Zhang, Rihan Chen, Xinchen Luo, Yin Yang, Guan Wang, Guorui Zhou, Xiaoqiang Zhu, and Kun Gai. 2020. DCAF: A Dynamic computation resource allocation Framework for Online Serving System. arXiv preprint arXiv:2006.09684 (2020)."},{"key":"e_1_3_2_1_22_1","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar Aviral","year":"2020","unstructured":"Aviral Kumar, Aurick Zhou, George Tucker, and Sergey Levine. 2020. Conservative q-learning for offline reinforcement learning. Advances in Neural Information Processing Systems 33 (2020), 1179\u20131191.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512109"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098011"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5932"},{"key":"e_1_3_2_1_26_1","unstructured":"Nicolas Meuleau Milos Hauskrecht Kee-Eung Kim Leonid Peshkin Leslie\u00a0Pack Kaelbling Thomas\u00a0L Dean and Craig Boutilier. 1998. Solving very large weakly coupled Markov decision processes. In AAAI\/IAAI. 165\u2013172."},{"key":"e_1_3_2_1_27_1","volume-title":"Human-level control through deep reinforcement learning. nature 518, 7540","author":"Mnih Volodymyr","year":"2015","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Andrei\u00a0A Rusu, Joel Veness, Marc\u00a0G Bellemare, Alex Graves, Martin Riedmiller, Andreas\u00a0K Fidjeland, Georg Ostrovski, 2015. Human-level control through deep reinforcement learning. nature 518, 7540 (2015), 529\u2013533."},{"key":"e_1_3_2_1_28_1","volume-title":"Model compression via distillation and quantization. arXiv preprint arXiv:1802.05668","author":"Polino Antonio","year":"2018","unstructured":"Antonio Polino, Razvan Pascanu, and Dan Alistarh. 2018. Model compression via distillation and quantization. arXiv preprint arXiv:1802.05668 (2018)."},{"volume-title":"Monte-Carlo simulation, and machine learning. Vol.\u00a0133","author":"Rubinstein Y","key":"e_1_3_2_1_29_1","unstructured":"Reuven\u00a0Y Rubinstein and Dirk\u00a0P Kroese. 2004. The cross-entropy method: a unified approach to combinatorial optimization, Monte-Carlo simulation, and machine learning. Vol.\u00a0133. Springer."},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings of the 19th International Conference on Autonomous Agents and MultiAgent Systems. 1359\u20131367","author":"Tang Pingzhong","year":"2020","unstructured":"Pingzhong Tang, Xun Wang, Zihe Wang, Yadong Xu, and Xiwang Yang. 2020. Optimized Cost per Mille in Feeds Advertising. In Proceedings of the 19th International Conference on Autonomous Agents and MultiAgent Systems. 1359\u20131367."},{"key":"e_1_3_2_1_31_1","volume-title":"Reward constrained policy optimization. arXiv preprint arXiv:1805.11074","author":"Tessler Chen","year":"2018","unstructured":"Chen Tessler, Daniel\u00a0J Mankowitz, and Shie Mannor. 2018. Reward constrained policy optimization. arXiv preprint arXiv:1805.11074 (2018)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"e_1_3_2_1_33_1","volume-title":"International conference on machine learning. PMLR","author":"Wang Ziyu","year":"2016","unstructured":"Ziyu Wang, Tom Schaul, Matteo Hessel, Hado Hasselt, Marc Lanctot, and Nando Freitas. 2016. Dueling network architectures for deep reinforcement learning. In International conference on machine learning. PMLR, 1995\u20132003."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.5555\/2627435.2638566"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3271748"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i5.16580"},{"key":"e_1_3_2_1_37_1","volume-title":"MoTiAC: Multi-objective actor-critics for real-time bidding. arXiv preprint arXiv:2002.07408","author":"Yang Chaoqi","year":"2020","unstructured":"Chaoqi Yang, Junwei Lu, Xiaofeng Gao, Haishan Liu, Qiong Chen, Gongshen Liu, and Guihai Chen. 2020. MoTiAC: Multi-objective actor-critics for real-time bidding. arXiv preprint arXiv:2002.07408 (2020)."},{"key":"e_1_3_2_1_38_1","volume-title":"Computation Resource Allocation Solution in Recommender Systems. arXiv preprint arXiv:2103.02259","author":"Yang Xun","year":"2021","unstructured":"Xun Yang, Yunli Wang, Cheng Chen, Qing Tan, Chuan Yu, Jian Xu, and Xiaoqiang Zhu. 2021. Computation Resource Allocation Solution in Recommender Systems. arXiv preprint arXiv:2103.02259 (2021)."},{"key":"e_1_3_2_1_39_1","volume-title":"Combo: Conservative offline model-based policy optimization. Advances in neural information processing systems 34","author":"Yu Tianhe","year":"2021","unstructured":"Tianhe Yu, Aviral Kumar, Rafael Rafailov, Aravind Rajeswaran, Sergey Levine, and Chelsea Finn. 2021. Combo: Conservative offline model-based policy optimization. Advances in neural information processing systems 34 (2021), 28954\u201328967."},{"key":"e_1_3_2_1_40_1","first-page":"20410","article-title":"BCORLE (\u03bb): An Offline Reinforcement Learning and Evaluation Framework for Coupons Allocation in E-commerce Market","volume":"34","author":"Zhang Yang","year":"2021","unstructured":"Yang Zhang, Bo Tang, Qingyu Yang, Dou An, Hongyin Tang, Chenyang Xi, Xueying Li, and Feiyu Xiong. 2021. BCORLE (\u03bb): An Offline Reinforcement Learning and Evaluation Framework for Coupons Allocation in E-commerce Market. Advances in Neural Information Processing Systems 34 (2021), 20410\u201320422.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i1.16156"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403384"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219823"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401174"}],"event":{"name":"WWW '23: The ACM Web Conference 2023","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Austin TX USA","acronym":"WWW '23"},"container-title":["Proceedings of the ACM Web Conference 2023"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583313","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3543507.3583313","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:22Z","timestamp":1750178242000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583313"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,30]]},"references-count":44,"alternative-id":["10.1145\/3543507.3583313","10.1145\/3543507"],"URL":"https:\/\/doi.org\/10.1145\/3543507.3583313","relation":{},"subject":[],"published":{"date-parts":[[2023,4,30]]},"assertion":[{"value":"2023-04-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}