{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T05:20:37Z","timestamp":1780636837167,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Meituan"},{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No.62173325"],"award-info":[{"award-number":["No.62173325"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1145\/3589335.3648310","type":"proceedings-article","created":{"date-parts":[[2024,5,12]],"date-time":"2024-05-12T18:41:21Z","timestamp":1715539281000},"page":"131-140","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["User Response Modeling in Reinforcement Learning for Ads Allocation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-2673-8169","authenticated-orcid":false,"given":"Zhiyuan","family":"Zhang","sequence":"first","affiliation":[{"name":"University of Chinese Academy of Sciences &amp; Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9747-391X","authenticated-orcid":false,"given":"Qichao","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences &amp; Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8890-452X","authenticated-orcid":false,"given":"Xiaoxu","family":"Wu","sequence":"additional","affiliation":[{"name":"Meituan, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2005-791X","authenticated-orcid":false,"given":"Xiaowen","family":"Shi","sequence":"additional","affiliation":[{"name":"Meituan, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6530-7102","authenticated-orcid":false,"given":"Guogang","family":"Liao","sequence":"additional","affiliation":[{"name":"Meituan, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5214-337X","authenticated-orcid":false,"given":"Yongkang","family":"Wang","sequence":"additional","affiliation":[{"name":"Meituan, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5512-6442","authenticated-orcid":false,"given":"Xingxing","family":"Wang","sequence":"additional","affiliation":[{"name":"Meituan, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8218-9633","authenticated-orcid":false,"given":"Dongbin","family":"Zhao","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences &amp; Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,5,13]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"International conference on machine learning. PMLR, 941--950","author":"Chandak Yash","year":"2019","unstructured":"Yash Chandak, Georgios Theocharous, James Kostas, Scott Jordan, and Philip Thomas. 2019. Learning action representations for reinforcement learning. In International conference on machine learning. PMLR, 941--950."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441764"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3523227.3546758"},{"key":"e_1_3_2_2_5_1","volume-title":"Boosting continuous control with consistency policy. arXiv preprint arXiv:2310.06343","author":"Chen Yuhui","year":"2023","unstructured":"Yuhui Chen, Haoran Li, and Dongbin Zhao. 2023. Boosting continuous control with consistency policy. arXiv preprint arXiv:2310.06343 (2023)."},{"key":"e_1_3_2_2_6_1","volume-title":"Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio.","author":"Cho Kyunghyun","year":"2014","unstructured":"Kyunghyun Cho, Bart Van Merri\u00ebnboer, Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio. 2014. Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078 (2014)."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC55140.2022.9922100"},{"key":"e_1_3_2_2_8_1","volume-title":"An empirical analysis of search engine advertising: Sponsored search in electronic markets. Management science","author":"Ghose Anindya","year":"2009","unstructured":"Anindya Ghose and Sha Yang. 2009. An empirical analysis of search engine advertising: Sponsored search in electronic markets. Management science, Vol. 55, 10 (2009), 1605--1622."},{"key":"e_1_3_2_2_9_1","volume-title":"Dream to control: Learning behaviors by latent imagination. arXiv preprint arXiv:1912.01603","author":"Hafner Danijar","year":"2019","unstructured":"Danijar Hafner, Timothy Lillicrap, Jimmy Ba, and Mohammad Norouzi. 2019. Dream to control: Learning behaviors by latent imagination. arXiv preprint arXiv:1912.01603 (2019)."},{"key":"e_1_3_2_2_10_1","volume-title":"Mastering atari with discrete world models. arXiv preprint arXiv:2010.02193","author":"Hafner Danijar","year":"2020","unstructured":"Danijar Hafner, Timothy Lillicrap, Mohammad Norouzi, and Jimmy Ba. 2020. Mastering atari with discrete world models. arXiv preprint arXiv:2010.02193 (2020)."},{"key":"e_1_3_2_2_11_1","volume-title":"Mastering Diverse Domains through World Models. arXiv preprint arXiv:2301.04104","author":"Hafner Danijar","year":"2023","unstructured":"Danijar Hafner, Jurgis Pasukonis, Jimmy Ba, and Timothy Lillicrap. 2023. Mastering Diverse Domains through World Models. arXiv preprint arXiv:2301.04104 (2023)."},{"key":"e_1_3_2_2_12_1","volume-title":"International conference on machine learning. PMLR","author":"He He","year":"2016","unstructured":"He He, Jordan Boyd-Graber, Kevin Kwok, and Hal Daum\u00e9 III. 2016. Opponent modeling in deep reinforcement learning. In International conference on machine learning. PMLR, 1804--1813."},{"key":"e_1_3_2_2_13_1","volume-title":"Understanding multi-step deep reinforcement learning: A systematic study of the DQN target. arXiv preprint arXiv:1901.07510","author":"Hernandez-Garcia J Fernando","year":"2019","unstructured":"J Fernando Hernandez-Garcia and Richard S Sutton. 2019. Understanding multi-step deep reinforcement learning: A systematic study of the DQN target. arXiv preprint arXiv:1901.07510 (2019)."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/360"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2018.00035"},{"key":"e_1_3_2_2_16_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2944789.2944791"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3411952"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3411952"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531847"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512109"},{"key":"e_1_3_2_2_22_1","volume-title":"Deep reinforcement learning based recommendation with explicit user-item interactions modeling. arXiv preprint arXiv:1810.12027","author":"Liu Feng","year":"2018","unstructured":"Feng Liu, Ruiming Tang, Xutao Li, Weinan Zhang, Yunming Ye, Haokun Chen, Huifeng Guo, and Yuzhou Zhang. 2018. Deep reinforcement learning based recommendation with explicit user-item interactions modeling. arXiv preprint arXiv:1810.12027 (2018)."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583244"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR48806.2021.9413027"},{"key":"e_1_3_2_2_25_1","volume-title":"International conference on machine learning. PMLR","author":"Mnih Volodymyr","year":"2016","unstructured":"Volodymyr Mnih, Adria Puigdomenech Badia, Mehdi Mirza, Alex Graves, Timothy Lillicrap, Tim Harley, David Silver, and Koray Kavukcuoglu. 2016. Asynchronous methods for deep reinforcement learning. In International conference on machine learning. PMLR, 1928--1937."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"crossref","unstructured":"Wentao Ouyang Xiuwu Zhang Lei Zhao Jinmei Luo Yu Zhang Heng Zou Zhaojie Liu and Yanlong Du. 2020. MiNet: Mixed Interest Network for Cross-Domain Click-Through Rate Prediction.","DOI":"10.1145\/3340531.3412728"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ECCE57851.2023.10101661"},{"key":"e_1_3_2_2_28_1","volume-title":"Retentive network: A successor to transformer for large language models. arXiv preprint arXiv:2307.08621","author":"Sun Yutao","year":"2023","unstructured":"Yutao Sun, Li Dong, Shaohan Huang, Shuming Ma, Yuqing Xia, Jilong Xue, Jianyong Wang, and Furu Wei. 2023. Retentive network: A successor to transformer for large language models. arXiv preprint arXiv:2307.08621 (2023)."},{"key":"e_1_3_2_2_29_1","volume-title":"Reinforcement learning: An introduction","author":"Sutton Richard S","unstructured":"Richard S Sutton and Andrew G Barto. 2018. Reinforcement learning: An introduction. MIT press."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3215788"},{"key":"e_1_3_2_2_31_1","unstructured":"Kai Wang Zhene Zou Yue Shang Qilin Deng Minghao Zhao Yile Liang Runze Wu Jianrong Tao Xudong Shen Tangjie Lyu et al. [n. d.]. RL4RS: A Real-World Benchmark for Reinforcement Learning based Recommender System."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357806"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557094"},{"key":"e_1_3_2_2_34_1","volume-title":"Machine learning","author":"Watkins Christopher JCH","year":"1992","unstructured":"Christopher JCH Watkins and Peter Dayan. 1992. Q-learning. Machine learning , Vol. 8 (1992), 279--292."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i5.16580"},{"key":"e_1_3_2_2_36_1","volume-title":"International Conference on Learning Representations.","author":"Xue Wanqi","year":"2023","unstructured":"Wanqi Xue, Qingpeng Cai, Ruohan Zhan, Dong Zheng, Peng Jiang, Kun Gai, and Bo An. 2023. ResAct: Reinforcing long-term engagement in sequential recommendation with residual actor. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403391"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"crossref","unstructured":"Mengchen Zhao Zhao Li Bo An Haifeng Lu Yifan Yang and Chen Chu. 2018a. Impression Allocation for Combating Fraud in E-commerce Via Deep Reinforcement Learning with Action Norm Penalty.. In IJCAI. 3940--3946.","DOI":"10.24963\/ijcai.2018\/548"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i1.16156"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219886"},{"key":"e_1_3_2_2_41_1","volume-title":"Deep Reinforcement Learning for List-wise Recommendations. In 1st Workshop on Deep Reinforcement Learning for Knowledge Discovery (DRL4KDD","author":"Zhao Xiangyu","year":"2019","unstructured":"Xiangyu Zhao, Liang Zhang, Long Xia, Zhuoye Ding, Dawei Yin, and Jiliang Tang. 2019. Deep Reinforcement Learning for List-wise Recommendations. In 1st Workshop on Deep Reinforcement Learning for Knowledge Discovery (DRL4KDD 2019)."},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403384"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"}],"event":{"name":"WWW '24: The ACM Web Conference 2024","location":"Singapore Singapore","acronym":"WWW '24","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Companion Proceedings of the ACM Web Conference 2024"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589335.3648310","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3589335.3648310","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:39:38Z","timestamp":1755823178000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589335.3648310"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":43,"alternative-id":["10.1145\/3589335.3648310","10.1145\/3589335"],"URL":"https:\/\/doi.org\/10.1145\/3589335.3648310","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]},"assertion":[{"value":"2024-05-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}