{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,10]],"date-time":"2026-07-10T01:58:56Z","timestamp":1783648736339,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,30]],"date-time":"2023-04-30T00:00:00Z","timestamp":1682812800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,30]]},"DOI":"10.1145\/3543507.3583244","type":"proceedings-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T23:30:51Z","timestamp":1682551851000},"page":"833-844","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":36,"title":["Exploration and Regularization of the Latent Action Space in Recommendation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1440-911X","authenticated-orcid":false,"given":"Shuchang","family":"Liu","sequence":"first","affiliation":[{"name":"Kuaishou Technology, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6451-9299","authenticated-orcid":false,"given":"Qingpeng","family":"Cai","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3625-0477","authenticated-orcid":false,"given":"Bowen","family":"Sun","sequence":"additional","affiliation":[{"name":"Peking University, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6051-8659","authenticated-orcid":false,"given":"Yuhao","family":"Wang","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9632-8400","authenticated-orcid":false,"given":"Ji","family":"Jiang","sequence":"additional","affiliation":[{"name":"Peking University, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0424-9658","authenticated-orcid":false,"given":"Dong","family":"Zheng","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9266-0780","authenticated-orcid":false,"given":"Peng","family":"Jiang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3636-3618","authenticated-orcid":false,"given":"Kun","family":"Gai","sequence":"additional","affiliation":[{"name":"Unaffiliated, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2926-4416","authenticated-orcid":false,"given":"Xiangyu","family":"Zhao","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2633-8555","authenticated-orcid":false,"given":"Yongfeng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Rutgers University, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,4,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Reinforcement learning based recommender systems: A survey. ACM Computing Surveys (CSUR)","author":"Afsar M\u00a0Mehdi","year":"2021","unstructured":"M\u00a0Mehdi Afsar, Trafford Crump, and Behrouz Far. 2021. Reinforcement learning based recommender systems: A survey. ACM Computing Surveys (CSUR) (2021)."},{"key":"e_1_3_2_1_2_1","volume-title":"A model-based reinforcement learning with adversarial training for online recommendation. Advances in Neural Information Processing Systems 32","author":"Bai Xueying","year":"2019","unstructured":"Xueying Bai, Jian Guan, and Hongning Wang. 2019. A model-based reinforcement learning with adversarial training for online recommendation. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_1_3_1","volume-title":"Constrained Reinforcement Learning for Short Video Recommendation. arXiv preprint arXiv:2205.13248","author":"Cai Qingpeng","year":"2022","unstructured":"Qingpeng Cai, Ruohan Zhan, Chi Zhang, Jie Zheng, Guangwei Ding, Pinghua Gong, Dong Zheng, and Peng Jiang. 2022. Constrained Reinforcement Learning for Short Video Recommendation. arXiv preprint arXiv:2205.13248 (2022)."},{"key":"e_1_3_2_1_4_1","volume-title":"International conference on machine learning. PMLR, 941\u2013950","author":"Chandak Yash","year":"2019","unstructured":"Yash Chandak, Georgios Theocharous, James Kostas, Scott Jordan, and Philip Thomas. 2019. Learning action representations for reinforcement learning. In International conference on machine learning. PMLR, 941\u2013950."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462968"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013312"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449973"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441764"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159668"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482347"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_1_13_1","volume-title":"Deep reinforcement learning in large discrete action spaces. arXiv preprint arXiv:1512.07679","author":"Dulac-Arnold Gabriel","year":"2015","unstructured":"Gabriel Dulac-Arnold, Richard Evans, Hado van Hasselt, Peter Sunehag, Timothy Lillicrap, Jonathan Hunt, Timothy Mann, Theophane Weber, Thomas Degris, and Ben Coppin. 2015. Deep reinforcement learning in large discrete action spaces. arXiv preprint arXiv:1512.07679 (2015)."},{"key":"e_1_3_2_1_14_1","volume-title":"A minimalist approach to offline reinforcement learning. Advances in neural information processing systems 34","author":"Fujimoto Scott","year":"2021","unstructured":"Scott Fujimoto and Shixiang\u00a0Shane Gu. 2021. A minimalist approach to offline reinforcement learning. Advances in neural information processing systems 34 (2021), 20132\u201320145."},{"key":"e_1_3_2_1_15_1","volume-title":"International conference on machine learning. PMLR, 1587\u20131596","author":"Fujimoto Scott","year":"2018","unstructured":"Scott Fujimoto, Herke Hoof, and David Meger. 2018. Addressing function approximation error in actor-critic methods. In International conference on machine learning. PMLR, 1587\u20131596."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557624"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441824"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498487"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3523227.3546767"},{"key":"e_1_3_2_1_20_1","volume-title":"The movielens datasets: History and context. Acm transactions on interactive intelligent systems (tiis) 5 4 (2015) 1\u201319","author":"Harper F\u00a0Maxwell","year":"2015","unstructured":"F\u00a0Maxwell Harper. 2015. The movielens datasets: History and context. Acm transactions on interactive intelligent systems (tiis) 5 4 (2015) 1\u201319. F Maxwell Harper and Joseph A Konstan. 2015. The movielens datasets: History and context. Acm transactions on interactive intelligent systems (tiis) 5 4 (2015) 1\u201319."},{"key":"e_1_3_2_1_21_1","unstructured":"Eugene Ie Vihan Jain Jing Wang Sanmit Narvekar Ritesh Agarwal Rui Wu Heng-Tze Cheng Tushar Chandra and Craig Boutilier. 2019. SlateQ: A tractable decomposition for reinforcement learning with recommendation sets. (2019)."},{"key":"e_1_3_2_1_22_1","volume-title":"Martin Mladenov, Vihan Jain, Sanmit Narvekar, Jing Wang, Rui Wu, and Craig Boutilier.","author":"Ie Eugene","year":"2019","unstructured":"Eugene Ie, Chih wei Hsu, Martin Mladenov, Vihan Jain, Sanmit Narvekar, Jing Wang, Rui Wu, and Craig Boutilier. 2019. RecSim: A Configurable Simulation Platform for Recommender Systems. (2019). arxiv:1909.04847\u00a0[cs.LG]"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3109859.3109872"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539597.3570464"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2018.00035"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2009.263"},{"key":"e_1_3_2_1_27_1","volume-title":"Advances in collaborative filtering. Recommender systems handbook","author":"Koren Yehuda","year":"2022","unstructured":"Yehuda Koren, Steffen Rendle, and Robert Bell. 2022. Advances in collaborative filtering. Recommender systems handbook (2022), 91\u2013142."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531941"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3186150"},{"key":"e_1_3_2_1_30_1","unstructured":"Timothy\u00a0P. Lillicrap Jonathan\u00a0J. Hunt Alexander Pritzel Nicolas Heess Tom Erez Yuval Tassa David Silver and Daan Wierstra. 2016. Continuous control with deep reinforcement learning.. In ICLR (Poster). http:\/\/arxiv.org\/abs\/1509.02971"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106170"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000016"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/1282100.1282114"},{"key":"e_1_3_2_1_34_1","volume-title":"International conference on machine learning. PMLR","author":"Mnih Volodymyr","year":"2016","unstructured":"Volodymyr Mnih, Adria\u00a0Puigdomenech Badia, Mehdi Mirza, Alex Graves, Timothy Lillicrap, Tim Harley, David Silver, and Koray Kavukcuoglu. 2016. Asynchronous methods for deep reinforcement learning. In International conference on machine learning. PMLR, 1928\u20131937."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313404"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772690.1772773"},{"key":"e_1_3_2_1_37_1","volume-title":"An MDP-based recommender system.Journal of Machine Learning Research 6, 9","author":"Shani Guy","year":"2005","unstructured":"Guy Shani, David Heckerman, Ronen\u00a0I Brafman, and Craig Boutilier. 2005. An MDP-based recommender system.Journal of Machine Learning Research 6, 9 (2005)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3411949"},{"key":"e_1_3_2_1_39_1","volume-title":"Veegan: Reducing mode collapse in gans using implicit variational learning. Advances in neural information processing systems 30","author":"Srivastava Akash","year":"2017","unstructured":"Akash Srivastava, Lazar Valkov, Chris Russell, Michael\u00a0U Gutmann, and Charles Sutton. 2017. Veegan: Reducing mode collapse in gans using implicit variational learning. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357895"},{"key":"e_1_3_2_1_41_1","volume-title":"Reinforcement learning: An introduction","author":"Sutton S","unstructured":"Richard\u00a0S Sutton and Andrew\u00a0G Barto. 2018. Reinforcement learning: An introduction. MIT press."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/1297231.1297250"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159656"},{"key":"e_1_3_2_1_44_1","volume-title":"RL4RS: A Real-World Benchmark for Reinforcement Learning based Recommender System. ArXiv abs\/2110.11073","author":"Wang Kai","year":"2021","unstructured":"Kai Wang, Zhene Zou, Yue Shang, Qilin Deng, Minghao Zhao, Runze Wu, Xudong Shen, Tangjie Lyu, and Changjie Fan. 2021. RL4RS: A Real-World Benchmark for Reinforcement Learning based Recommender System. ArXiv abs\/2110.11073 (2021)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3440207"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.3301346"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3331184.3331203"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401147"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498494"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3298689.3346996"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i1.16156"},{"key":"e_1_3_2_1_52_1","volume-title":"Long Xia, Jiliang Tang, and Dawei Yin with Martin Vesely as coordinator. ACM sigweb newsletterSpring","author":"Zhao Xiangyu","year":"2019","unstructured":"Xiangyu Zhao, Long Xia, Jiliang Tang, and Dawei Yin. 2019. \" Deep reinforcement learning for search, recommendation, and online advertising: a survey\" by Xiangyu Zhao, Long Xia, Jiliang Tang, and Dawei Yin with Martin Vesely as coordinator. ACM sigweb newsletterSpring (2019), 1\u201315."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240374"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412044"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450125"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219886"}],"event":{"name":"WWW '23: The ACM Web Conference 2023","location":"Austin TX USA","acronym":"WWW '23","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2023"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583244","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3543507.3583244","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:09:19Z","timestamp":1750183759000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583244"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,30]]},"references-count":56,"alternative-id":["10.1145\/3543507.3583244","10.1145\/3543507"],"URL":"https:\/\/doi.org\/10.1145\/3543507.3583244","relation":{},"subject":[],"published":{"date-parts":[[2023,4,30]]},"assertion":[{"value":"2023-04-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}