{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:08:53Z","timestamp":1755907733652,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":24,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,26]],"date-time":"2023-11-26T00:00:00Z","timestamp":1700956800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,26]]},"DOI":"10.1145\/3624918.3625311","type":"proceedings-article","created":{"date-parts":[[2023,11,23]],"date-time":"2023-11-23T08:49:17Z","timestamp":1700729357000},"page":"282-287","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Reinforcement Re-ranking with 2D Grid-based Recommendation Panels"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7740-508X","authenticated-orcid":false,"given":"Sirui","family":"Chen","sequence":"first","affiliation":[{"name":"School of Information, Renmin University of China, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7397-5632","authenticated-orcid":false,"given":"Xiao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Gaoling School of AI (GSAI), Renmin University of China, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0144-1775","authenticated-orcid":false,"given":"Xu","family":"Chen","sequence":"additional","affiliation":[{"name":"Gaoling School of AI (GSAI), Renmin University of China, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8187-4177","authenticated-orcid":false,"given":"Zhiyu","family":"Li","sequence":"additional","affiliation":[{"name":"Alibaba Group, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3630-9179","authenticated-orcid":false,"given":"Yuan","family":"Wang","sequence":"additional","affiliation":[{"name":"Alibaba Group, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7149-5525","authenticated-orcid":false,"given":"Quan","family":"Lin","sequence":"additional","affiliation":[{"name":"Alibaba Group, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7170-111X","authenticated-orcid":false,"given":"Jun","family":"Xu","sequence":"additional","affiliation":[{"name":"Gaoling School of AI (GSAI), Renmin University of China, China"}]}],"member":"320","published-online":{"date-parts":[[2023,11,26]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Xiyang Luo, Alan Mackey, and Ofer Meshi.","author":"Bello Irwan","year":"2018","unstructured":"Irwan Bello, Sayali Kulkarni, Sagar Jain, Craig Boutilier, Ed Chi, Elad Eban, Xiyang Luo, Alan Mackey, and Ofer Meshi. 2018. Seq2slate: Re-ranking and slate optimization with RNNs. arXiv preprint arXiv:1810.02019 (2018)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11452"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599796"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441824"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403336"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106706"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080805"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1743666.1743736"},{"key":"e_1_3_2_1_9_1","volume-title":"Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971","author":"Lillicrap P","year":"2015","unstructured":"Timothy\u00a0P Lillicrap, Jonathan\u00a0J Hunt, Alexander Pritzel, Nicolas Heess, Tom Erez, Yuval Tassa, David Silver, and Daan Wierstra. 2015. Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599364"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3298689.3347000"},{"key":"e_1_3_2_1_12_1","volume-title":"High-dimensional continuous control using generalized advantage estimation. arXiv preprint arXiv:1506.02438","author":"Schulman John","year":"2015","unstructured":"John Schulman, Philipp Moritz, Sergey Levine, Michael Jordan, and Pieter Abbeel. 2015. High-dimensional continuous control using generalized advantage estimation. arXiv preprint arXiv:1506.02438 (2015)."},{"key":"e_1_3_2_1_13_1","volume-title":"Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.5555\/1046920.1088715"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2792838.2800170"},{"key":"e_1_3_2_1_16_1","volume-title":"Sequential evaluation and generation framework for combinatorial recommender system. arXiv preprint arXiv:1902.00245","author":"Wang Fan","year":"2019","unstructured":"Fan Wang, Xiaomin Fang, Lihang Liu, Yaxue Chen, Jiucheng Tao, Zhiming Peng, Cihang Jin, and Hao Tian. 2019. Sequential evaluation and generation framework for combinatorial recommender system. arXiv preprint arXiv:1902.00245 (2019)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080799"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313514"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401148"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Wanqi Xue Qingpeng Cai Zhenghai Xue Shuo Sun Shuchang Liu Dong Zheng Peng Jiang Kun Gai and Bo An. 2023. PrefRec: Recommender Systems with Human Preferences for Reinforcing Long-term User Engagement. (2023).","DOI":"10.1145\/3580305.3599473"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240374"},{"key":"e_1_3_2_1_22_1","volume-title":"Globally optimized mutual influence aware ranking in e-commerce search. arXiv preprint arXiv:1805.08524","author":"Zhuang Tao","year":"2018","unstructured":"Tao Zhuang, Wenwu Ou, and Zhirong Wang. 2018. Globally optimized mutual influence aware ranking in e-commerce search. arXiv preprint arXiv:1805.08524 (2018)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330668"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3336191.3371801"}],"event":{"name":"SIGIR-AP '23: Annual International ACM SIGIR Conference on Research and Development in Information Retrieval in the Asia Pacific Region","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Beijing China","acronym":"SIGIR-AP '23"},"container-title":["Proceedings of the Annual International ACM SIGIR Conference on Research and Development in Information Retrieval in the Asia Pacific Region"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624918.3625311","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3624918.3625311","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T21:32:46Z","timestamp":1755898366000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624918.3625311"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,26]]},"references-count":24,"alternative-id":["10.1145\/3624918.3625311","10.1145\/3624918"],"URL":"https:\/\/doi.org\/10.1145\/3624918.3625311","relation":{},"subject":[],"published":{"date-parts":[[2023,11,26]]},"assertion":[{"value":"2023-11-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}