{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,31]],"date-time":"2025-12-31T12:16:07Z","timestamp":1767183367796,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,7,18]],"date-time":"2023-07-18T00:00:00Z","timestamp":1689638400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,7,19]]},"DOI":"10.1145\/3539618.3591899","type":"proceedings-article","created":{"date-parts":[[2023,7,19]],"date-time":"2023-07-19T00:22:23Z","timestamp":1689726143000},"page":"2935-2944","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":13,"title":["RL4RS: A Real-World Dataset for Reinforcement Learning based Recommender System"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7767-2329","authenticated-orcid":false,"given":"Kai","family":"Wang","sequence":"first","affiliation":[{"name":"NetEase Fuxi AI Lab, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-0142-1010","authenticated-orcid":false,"given":"Zhene","family":"Zou","sequence":"additional","affiliation":[{"name":"NetEase Fuxi AI Lab, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2871-0023","authenticated-orcid":false,"given":"Minghao","family":"Zhao","sequence":"additional","affiliation":[{"name":"NetEase Fuxi AI Lab, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0506-5511","authenticated-orcid":false,"given":"Qilin","family":"Deng","sequence":"additional","affiliation":[{"name":"NetEase Fuxi AI Lab, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9842-9937","authenticated-orcid":false,"given":"Yue","family":"Shang","sequence":"additional","affiliation":[{"name":"NetEase Fuxi AI Lab, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7405-6757","authenticated-orcid":false,"given":"Yile","family":"Liang","sequence":"additional","affiliation":[{"name":"NetEase Fuxi AI Lab, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6986-5825","authenticated-orcid":false,"given":"Runze","family":"Wu","sequence":"additional","affiliation":[{"name":"NetEase Fuxi AI Lab, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0447-2614","authenticated-orcid":false,"given":"Xudong","family":"Shen","sequence":"additional","affiliation":[{"name":"NetEase Fuxi AI Lab, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9858-809X","authenticated-orcid":false,"given":"Tangjie","family":"Lyu","sequence":"additional","affiliation":[{"name":"NetEase Fuxi AI Lab, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5420-0516","authenticated-orcid":false,"given":"Changjie","family":"Fan","sequence":"additional","affiliation":[{"name":"NetEase Fuxi AI Lab, Hangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2023,7,18]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"NeurIPS","author":"Bai X.","year":"2019","unstructured":"X. Bai, J. Guan, and H. Wang. Model-based reinforcement learning with adversarial training for online recommendation. In NeurIPS, 2019."},{"key":"e_1_3_2_1_2_1","volume-title":"Decision transformer: Reinforcement learning via sequence modeling. arXiv preprint arXiv:2106.01345","author":"Chen L.","year":"2021","unstructured":"L. Chen, L. Kevin, R. Aravind, L. Kimin, G. Aditya, L. Michael, A. Pieter, S. Aravind, and M. Igor. Decision transformer: Reinforcement learning via sequence modeling. arXiv preprint arXiv:2106.01345, 2021."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_1_4_1","first-page":"1052","volume-title":"International Conference on Machine Learning","author":"Chen X.","year":"2019","unstructured":"X. Chen, S. Li, H. Li, S. Jiang, Y. Qi, and L. Song. Generative adversarial user model for reinforcement learning based recommendation system. In International Conference on Machine Learning, pages 1052--1061. PMLR, 2019."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_1_6_1","volume-title":"Doubly robust policy evaluation and learning. arXiv preprint arXiv:1103.4601","author":"Dud\u00edk M.","year":"2011","unstructured":"M. Dud\u00edk, J. Langford, and L. Li. Doubly robust policy evaluation and learning. arXiv preprint arXiv:1103.4601, 2011."},{"key":"e_1_3_2_1_7_1","volume-title":"Deep reinforcement learning in large discrete action spaces. arXiv preprint arXiv:1512.07679","author":"Dulac-Arnold G.","year":"2015","unstructured":"G. Dulac-Arnold, R. Evans, H. van Hasselt, P. Sunehag, T. Lillicrap, J. Hunt, T. Mann, T. Weber, T. Degris, and B. Coppin. Deep reinforcement learning in large discrete action spaces. arXiv preprint arXiv:1512.07679, 2015."},{"key":"e_1_3_2_1_8_1","volume-title":"D4rl: Datasets for deep data-driven reinforcement learning. arXiv","author":"Fu J.","year":"2020","unstructured":"J. Fu, A. Kumar, O. Nachum, G. Tucker, and S. Levine. D4rl: Datasets for deep data-driven reinforcement learning. arXiv, 2020."},{"key":"e_1_3_2_1_9_1","first-page":"1587","volume-title":"International conference on machine learning","author":"Fujimoto S.","year":"2018","unstructured":"S. Fujimoto, H. Hoof, and D. Meger. Addressing function approximation error in actor-critic methods. In International conference on machine learning, pages 1587--1596. PMLR, 2018."},{"key":"e_1_3_2_1_10_1","first-page":"2052","volume-title":"International Conference on Machine Learning","author":"Fujimoto S.","year":"2019","unstructured":"S. Fujimoto, D. Meger, and D. Precup. Off-policy deep reinforcement learning without exploration. In International Conference on Machine Learning, pages 2052--2062, 2019."},{"key":"e_1_3_2_1_11_1","volume-title":"Horizon: Facebook's open source applied reinforcement learning platform. ArXiv, abs\/1811.00260","author":"Gauci J.","year":"2018","unstructured":"J. Gauci, E. Conti, Y. Liang, K. Virochsiri, Y. R. He, Z. Kaden, V. Narayanan, X. Ye, and S. Fujimoto. Horizon: Facebook's open source applied reinforcement learning platform. ArXiv, abs\/1811.00260, 2018."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330832"},{"key":"e_1_3_2_1_13_1","volume-title":"Rl unplugged: Benchmarks for offline reinforcement learning. arXiv","author":"Gulcehre C.","year":"2020","unstructured":"C. Gulcehre, Z. Wang, A. Novikov, T. L. Paine, and N. D. Freitas. Rl unplugged: Benchmarks for offline reinforcement learning. arXiv, 2020."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"e_1_3_2_1_15_1","volume-title":"Session-based recommenda- tions with recurrent neural networks. Computer ence","author":"Hidasi B.","year":"2015","unstructured":"B. Hidasi, A. Karatzoglou, L. Baltrunas, and D. Tikk. Session-based recommenda- tions with recurrent neural networks. Computer ence, 2015."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1952.10483446"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219846"},{"key":"e_1_3_2_1_18_1","volume-title":"Aliexpress learning-to-rank: Maximizing online model performance without going online","author":"Huzhang G.","year":"2021","unstructured":"G. Huzhang, Z. Pang, Y. Gao, Y. Liu, W. Shen, W.-J. Zhou, Q. Da, A. Zeng, H. Yu, Y. Yu, et al. Aliexpress learning-to-rank: Maximizing online model performance without going online. IEEE Transactions on Knowledge and Data Engineering, 2021."},{"key":"e_1_3_2_1_19_1","volume-title":"Recsim: A configurable simulation platform for recommender systems. arXiv preprint arXiv:1909.04847","author":"Ie E.","year":"2019","unstructured":"E. Ie, C.-w. Hsu, M. Mladenov, V. Jain, S. Narvekar, J. Wang, R. Wu, and C. Boutilier. Recsim: A configurable simulation platform for recommender systems. arXiv preprint arXiv:1909.04847, 2019."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/360"},{"key":"e_1_3_2_1_21_1","volume-title":"Reinforcement learning as one big sequence modeling problem. arXiv preprint arXiv:2106.02039","author":"Janner M.","year":"2021","unstructured":"M. Janner, Q. Li, and S. Levine. Reinforcement learning as one big sequence modeling problem. arXiv preprint arXiv:2106.02039, 2021."},{"key":"e_1_3_2_1_22_1","first-page":"652","volume-title":"International Conference on Machine Learning","author":"Jiang N.","year":"2016","unstructured":"N. Jiang and L. Li. Doubly robust off-policy value evaluation for reinforcement learning. In International Conference on Machine Learning, pages 652--661. PMLR, 2016."},{"key":"e_1_3_2_1_23_1","volume-title":"ArXiv","author":"Kumar A.","year":"2020","unstructured":"A. Kumar, A. Zhou, G. Tucker, and S. Levine. Conservative q-learning for offline reinforcement learning. ArXiv, 2020."},{"key":"e_1_3_2_1_24_1","volume-title":"International Conference on Machine Learning (ICML)","author":"Liang E.","year":"2018","unstructured":"E. Liang, R. Liaw, R. Nishihara, P. Moritz, R. Fox, K. Goldberg, J. E. Gonzalez, M. I. Jordan, and I. Stoica. RLlib: Abstractions for distributed reinforcement learning. In International Conference on Machine Learning (ICML), 2018."},{"key":"e_1_3_2_1_25_1","volume-title":"Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971","author":"Lillicrap T. P.","year":"2015","unstructured":"T. P. Lillicrap, J. J. Hunt, A. Pritzel, N. Heess, T. Erez, Y. Tassa, D. Silver, and D. Wierstra. Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971, 2015."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474259"},{"key":"e_1_3_2_1_27_1","volume-title":"Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602","author":"Mnih V.","year":"2013","unstructured":"V. Mnih, K. Kavukcuoglu, D. Silver, A. Graves, I. Antonoglou, D. Wierstra, and M. Riedmiller. Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602, 2013."},{"key":"e_1_3_2_1_28_1","volume-title":"ICML","author":"Mnih V.","year":"2016","unstructured":"V. Mnih, A. P. Badia, M. Mirza, A. Graves, T. P. Lillicrap, T. Harley, D. Silver, and K. Kavukcuoglu. Asynchronous methods for deep reinforcement learning. In ICML, 2016."},{"key":"e_1_3_2_1_29_1","volume-title":"Improving language understanding by generative pre-training","author":"Radford A.","year":"2018","unstructured":"A. Radford and K. Narasimhan. Improving language understanding by generative pre-training. 2018."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.17487\/rfc1321"},{"key":"e_1_3_2_1_31_1","volume-title":"Recogym: A reinforcement learning environment for the problem of product recommendation in online advertising. ArXiv, abs\/1808.00720","author":"Rohde D.","year":"2018","unstructured":"D. Rohde, S. Bonner, T. Dunlop, F. Vasile, and A. Karatzoglou. Recogym: A reinforcement learning environment for the problem of product recommendation in online advertising. ArXiv, abs\/1808.00720, 2018."},{"key":"e_1_3_2_1_32_1","volume-title":"Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347","author":"Schulman J.","year":"2017","unstructured":"J. Schulman, F. Wolski, P. Dhariwal, A. Radford, and O. Klimov. Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347, 2017."},{"key":"e_1_3_2_1_33_1","volume-title":"An offline deep reinforcement library. https:\/\/github.com\/ takuseno\/d3rlpy","author":"Seno T.","year":"2020","unstructured":"T. Seno. d3rlpy: An offline deep reinforcement library. https:\/\/github.com\/ takuseno\/d3rlpy, 2020."},{"issue":"9","key":"e_1_3_2_1_34_1","volume":"6","author":"Shani G.","year":"2005","unstructured":"G. Shani, D. Heckerman, R. I. Brafman, and C. Boutilier. An mdp-based recommender system. Journal of Machine Learning Research, 6(9), 2005.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014902"},{"key":"e_1_3_2_1_36_1","volume-title":"NIPS","author":"Sutton R. S.","year":"1999","unstructured":"R. S. Sutton, D. A. McAllester, S. Singh, and Y. Mansour. Policy gradient methods for reinforcement learning with function approximation. In NIPS, 1999."},{"key":"e_1_3_2_1_37_1","volume-title":"Deepmind control suite. arXiv","author":"Tassa Y.","year":"2018","unstructured":"Y. Tassa, Y. Doron, A. Muldal, T. Erez, Y. Li, D. Casas, D. Budden, A. Abdolmaleki, J. Merel, and A. Lefrancq. Deepmind control suite. arXiv, 2018."},{"key":"e_1_3_2_1_38_1","first-page":"2139","volume-title":"International Conference on Machine Learning","author":"Thomas P.","year":"2016","unstructured":"P. Thomas and E. Brunskill. Data-efficient off-policy policy evaluation for re-inforcement learning. In International Conference on Machine Learning, pages 2139--2148. PMLR, 2016."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_1_40_1","first-page":"14129","volume":"33","author":"Yu T.","year":"2020","unstructured":"T. Yu, G. Thomas, L. Yu, S. Ermon, J. Y. Zou, S. Levine, C. Finn, and T. Ma. Mopo: Model-based offline policy optimization. Advances in Neural Information Processing Systems, 33:14129--14142, 2020.","journal-title":"Mopo: Model-based offline policy optimization. Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_41_1","volume-title":"Combo: Con-servative offline model-based policy optimization. Advances in neural information processing systems, 34: 28954--28967","author":"Yu T.","year":"2021","unstructured":"T. Yu, A. Kumar, R. Rafailov, A. Rajeswaran, S. Levine, and C. Finn. Combo: Con-servative offline model-based policy optimization. Advances in neural information processing systems, 34:28954--28967, 2021."},{"key":"e_1_3_2_1_42_1","volume-title":"Deep reinforcement learning for list-wise recommendations. arXiv preprint arXiv:1801.00209","author":"Zhao X.","year":"2017","unstructured":"X. Zhao, L. Zhang, L. Xia, Z. Ding, D. Yin, and J. Tang. Deep reinforcement learning for list-wise recommendations. arXiv preprint arXiv:1801.00209, 2017."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015941"}],"event":{"name":"SIGIR '23: The 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Taipei Taiwan","acronym":"SIGIR '23"},"container-title":["Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539618.3591899","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3539618.3591899","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:38:07Z","timestamp":1750178287000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539618.3591899"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,18]]},"references-count":44,"alternative-id":["10.1145\/3539618.3591899","10.1145\/3539618"],"URL":"https:\/\/doi.org\/10.1145\/3539618.3591899","relation":{},"subject":[],"published":{"date-parts":[[2023,7,18]]},"assertion":[{"value":"2023-07-18","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}