{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:09:52Z","timestamp":1765544992973,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T00:00:00Z","timestamp":1745280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,28]]},"DOI":"10.1145\/3696410.3714959","type":"proceedings-article","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T22:52:18Z","timestamp":1745362338000},"page":"3840-3849","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["xMTF: A Formula-Free Model for Reinforcement-Learning-Based Multi-Task Fusion in Recommender Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4724-5289","authenticated-orcid":false,"given":"Yang","family":"Cao","sequence":"first","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6112-8474","authenticated-orcid":false,"given":"Changhao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1267-1680","authenticated-orcid":false,"given":"Xiaoshuang","family":"Chen","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1642-7840","authenticated-orcid":false,"given":"Kaiqiao","family":"Zhan","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1329-3876","authenticated-orcid":false,"given":"Ben","family":"Wang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543873.3584640"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583259"},{"key":"e_1_3_2_1_3_1","volume-title":"Value-aware recommendation based on reinforced profit maximization in e-commerce systems. arXiv preprint arXiv:1902.00851","author":"Changhua Pei","year":"2019","unstructured":"Pei Changhua, Yang Xinru, Cui Qing, Lin Xiao, Sun Fei, Jiang Peng, Ou Wenwu, and Zhang Yongfeng. 2019. Value-aware recommendation based on reinforced profit maximization in e-commerce systems. arXiv preprint arXiv:1902.00851 (2019)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_1_5_1","volume-title":"Cache-Aware Reinforcement Learning in Large-Scale Recommender Systems. In Companion Proceedings of the ACM on Web Conference","author":"Chen Xiaoshuang","year":"2024","unstructured":"Xiaoshuang Chen, Gengrui Zhang, Yao Wang, Yulin Wu, Shuo Su, Kaiqiao Zhan, and Ben Wang. 2024. Cache-Aware Reinforcement Learning in Large-Scale Recommender Systems. In Companion Proceedings of the ACM on Web Conference 2024. 284--291."},{"key":"e_1_3_2_1_6_1","volume-title":"International conference on machine learning. PMLR, 794--803","author":"Chen Zhao","year":"2018","unstructured":"Zhao Chen, Vijay Badrinarayanan, Chen-Yu Lee, and Andrew Rabinovich. 2018. Gradnorm: Gradient normalization for adaptive loss balancing in deep multitask networks. In International conference on machine learning. PMLR, 794--803."},{"key":"e_1_3_2_1_7_1","volume-title":"International conference on machine learning. PMLR, 1587--1596","author":"Fujimoto Scott","year":"2018","unstructured":"Scott Fujimoto, Herke Hoof, and David Meger. 2018. Addressing function approximation error in actor-critic methods. In International conference on machine learning. PMLR, 1587--1596."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557624"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3336191.3371827"},{"key":"e_1_3_2_1_10_1","volume-title":"International conference on machine learning. PMLR","author":"Haarnoja Tuomas","year":"2018","unstructured":"Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, and Sergey Levine. 2018. Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In International conference on machine learning. PMLR, 1861--1870."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/AIAM48774.2019.00011"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3640457.3688132"},{"volume-title":"Soft Computing as Transdisciplinary Science and Technology: Proceedings of the fourth IEEE International Workshop WSTST'05","author":"K\u00f6ppen Mario","key":"e_1_3_2_1_13_1","unstructured":"Mario K\u00f6ppen and Kaori Yoshida. 2005. Universal representation of image functions by the sprecher construction. In Soft Computing as Transdisciplinary Science and Technology: Proceedings of the fourth IEEE International Workshop WSTST'05. Springer, 202--210."},{"key":"e_1_3_2_1_14_1","volume-title":"Random Search, Genetic Algorithm: A Big Comparison for NAS. arxiv","author":"Liashchynskyi Petro","year":"1912","unstructured":"Petro Liashchynskyi and Pavlo Liashchynskyi. 2019. Grid Search, Random Search, Genetic Algorithm: A Big Comparison for NAS. arxiv: 1912.06059 [cs.LG] https:\/\/arxiv.org\/abs\/1912.06059"},{"key":"e_1_3_2_1_15_1","volume-title":"Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971","author":"Lillicrap Timothy P","year":"2015","unstructured":"Timothy P Lillicrap, Jonathan J Hunt, Alexander Pritzel, Nicolas Heess, Tom Erez, Yuval Tassa, David Silver, and Daan Wierstra. 2015. Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)."},{"key":"e_1_3_2_1_16_1","volume-title":"com recommendations: Item-to-item collaborative filtering","author":"Linden Greg","year":"2003","unstructured":"Greg Linden, Brent Smith, and Jeremy York. 2003. Amazon. com recommendations: Item-to-item collaborative filtering. IEEE Internet computing, Vol. 7, 1 (2003), 76--80."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1719970.1719976"},{"key":"e_1_3_2_1_18_1","volume-title":"An Off-Policy Reinforcement Learning Algorithm Customized for Multi-Task Fusion in Large-Scale Recommender Systems. arXiv preprint arXiv:2404.17589","author":"Liu Peng","year":"2024","unstructured":"Peng Liu, Cong Xu, Ming Zhao, Jiawei Zhu, Bin Wang, and Yi Ren. 2024. An Off-Policy Reinforcement Learning Algorithm Customized for Multi-Task Fusion in Large-Scale Recommender Systems. arXiv preprint arXiv:2404.17589 (2024)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220007"},{"key":"e_1_3_2_1_20_1","volume-title":"Proceedings of the IFIP Technical Conference. 400--404","author":"Mockus Jonas","year":"1974","unstructured":"Jonas Mockus. 1974. On Bayesian methods for seeking the extremum. In Proceedings of the IFIP Technical Conference. 400--404."},{"key":"e_1_3_2_1_21_1","volume-title":"BPR: Bayesian personalized ranking from implicit feedback. arXiv preprint arXiv:1205.2618","author":"Rendle Steffen","year":"2012","unstructured":"Steffen Rendle, Christoph Freudenthaler, Zeno Gantner, and Lars Schmidt-Thieme. 2012. BPR: Bayesian personalized ranking from implicit feedback. arXiv preprint arXiv:1205.2618 (2012)."},{"volume-title":"Monte-Carlo simulation, and machine learning.","author":"Rubinstein Reuven Y","key":"e_1_3_2_1_22_1","unstructured":"Reuven Y Rubinstein and Dirk P Kroese. 2004. The cross-entropy method: a unified approach to combinatorial optimization, Monte-Carlo simulation, and machine learning. Vol. 133. Springer."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i8.28749"},{"volume-title":"Reinforcement learning: An introduction","author":"Sutton Richard S","key":"e_1_3_2_1_24_1","unstructured":"Richard S Sutton and Andrew G Barto. 2018. Reinforcement learning: An introduction. MIT press."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412236"},{"key":"e_1_3_2_1_26_1","volume-title":"2017 USENIX Annual Technical Conference (USENIX ATC 17)","author":"Tang Linpeng","year":"2017","unstructured":"Linpeng Tang, Qi Huang, Amit Puntambekar, Ymir Vigfusson, Wyatt Lloyd, and Kai Li. 2017. Popularity prediction of facebook videos for higher quality streaming. In 2017 USENIX Annual Technical Conference (USENIX ATC 17). 111--123."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v12i1.15031"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589335.3648345"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"Wanqi Xue Qingpeng Cai Zhenghai Xue Shuo Sun Shuchang Liu Dong Zheng Peng Jiang Kun Gai and Bo An. 2023. PrefRec: Recommender Systems with Human Preferences for Reinforcing Long-term User Engagement. (2023).","DOI":"10.1145\/3580305.3599473"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i9.26275"},{"key":"e_1_3_2_1_31_1","first-page":"5824","article-title":"Gradient surgery for multi-task learning","volume":"33","author":"Yu Tianhe","year":"2020","unstructured":"Tianhe Yu, Saurabh Kumar, Abhishek Gupta, Sergey Levine, Karol Hausman, and Chelsea Finn. 2020. Gradient surgery for multi-task learning. Advances in Neural Information Processing Systems, Vol. 33 (2020), 5824--5836.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539092"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i8.28783"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539040"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i15.29652"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219823"}],"event":{"name":"WWW '25: The ACM Web Conference 2025","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Sydney NSW Australia","acronym":"WWW '25"},"container-title":["Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714959","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696410.3714959","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:54Z","timestamp":1750295934000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714959"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,22]]},"references-count":37,"alternative-id":["10.1145\/3696410.3714959","10.1145\/3696410"],"URL":"https:\/\/doi.org\/10.1145\/3696410.3714959","relation":{},"subject":[],"published":{"date-parts":[[2025,4,22]]},"assertion":[{"value":"2025-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}