{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:16:49Z","timestamp":1750220209119,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":13,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,9,13]],"date-time":"2022-09-13T00:00:00Z","timestamp":1663027200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,9,18]]},"DOI":"10.1145\/3523227.3547370","type":"proceedings-article","created":{"date-parts":[[2022,9,13]],"date-time":"2022-09-13T14:13:46Z","timestamp":1663078426000},"page":"700-701","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Hands-on Reinforcement Learning for Recommender Systems - From Bandits to SlateQ to Offline RL with Ray RLlib"],"prefix":"10.1145","author":[{"given":"Christy D.","family":"Bergman","sequence":"first","affiliation":[{"name":"Anyscale, United States"}]},{"given":"Kourosh","family":"Hakhamaneshi","sequence":"additional","affiliation":[{"name":"Anyscale Inc., United States"}]}],"member":"320","published-online":{"date-parts":[[2022,9,13]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_1_2_1","volume-title":"Hierarchical few-shot imitation with skill transition models. arXiv preprint arXiv:2107.08981 abs\/2107.08981, 1","author":"Hakhamaneshi Kourosh","year":"2021","unstructured":"Kourosh Hakhamaneshi, Ruihan Zhao, Albert Zhan, Pieter Abbeel, and Michael Laskin. 2021. Hierarchical few-shot imitation with skill transition models. arXiv preprint arXiv:2107.08981 abs\/2107.08981, 1 (2021), 1\u201319."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412233"},{"key":"e_1_3_2_1_4_1","volume-title":"Recsim: A configurable simulation platform for recommender systems. arXiv preprint arXiv:1909.04847 abs\/1909.04847","author":"Ie Eugene","year":"2019","unstructured":"Eugene Ie, Chih-wei Hsu, Martin Mladenov, Vihan Jain, Sanmit Narvekar, Jing Wang, Rui Wu, and Craig Boutilier. 2019. Recsim: A configurable simulation platform for recommender systems. arXiv preprint arXiv:1909.04847 abs\/1909.04847 (2019), 1\u201323."},{"key":"e_1_3_2_1_5_1","volume-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643 abs\/2005.01643","author":"Levine Sergey","year":"2020","unstructured":"Sergey Levine, Aviral Kumar, George Tucker, and Justin Fu. 2020. Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643 abs\/2005.01643 (2020), 1\u201343."},{"key":"e_1_3_2_1_6_1","volume-title":"Distributed Reinforcement Learning is a Dataflow Problem. arXiv preprint arXiv:2011.12719 34","author":"Liang Eric","year":"2020","unstructured":"Eric Liang, Zhanghao Wu, Michael Luo, Sven Mika, and Ion Stoica. 2020. Distributed Reinforcement Learning is a Dataflow Problem. arXiv preprint arXiv:2011.12719 34 (2020), 5506\u20135517."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3474600"},{"volume-title":"Retrieved","year":"2022","key":"e_1_3_2_1_8_1","unstructured":"Ray. 2022. Ray provides a simple, universal API for building distributed applications. ray.io. Retrieved July 12, 2022 from https:\/\/github.com\/ray-project\/ray"},{"volume-title":"Retrieved","year":"2022","key":"e_1_3_2_1_9_1","unstructured":"RLlib. 2022. RLlib: Industry-Grade Reinforcement Learning. ray.io. Retrieved July 12, 2022 from https:\/\/github.com\/ray-project\/ray\/tree\/master\/rllib"},{"key":"e_1_3_2_1_10_1","first-page":"65","article-title":"RLgraph: Modular Computation Graphs for Deep Reinforcement Learning","volume":"1","author":"Schaarschmidt Michael","year":"2019","unstructured":"Michael Schaarschmidt, Sven Mika, Kai Fricke, and Eiko Yoneki. 2019. RLgraph: Modular Computation Graphs for Deep Reinforcement Learning. Proceedings of Machine Learning and Systems 1 (2019), 65\u201380.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_11_1","volume-title":"Retrieved","author":"Studios Wildlife","year":"2021","unstructured":"Wildlife Studios. 2021. Using Reinforcement Learning to Optimize IAP Offer Recommendations in Mobile Games. wildlifestudios.com. Retrieved July 12, 2022 from https:\/\/www.youtube.com\/watch?v=cGQk8rIoc1Y"},{"key":"e_1_3_2_1_12_1","first-page":"6288","article-title":"Exponentially weighted imitation learning for batched historical data","volume":"31","author":"Wang Qing","year":"2018","unstructured":"Qing Wang, Jiechao Xiong, Lei Han, Han Liu, Tong Zhang, 2018. Exponentially weighted imitation learning for batched historical data. Advances in Neural Information Processing Systems 31 (2018), 6288\u20136297.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_13_1","unstructured":"Ziyu Wang Alexander Novikov Konrad Zolna Jost\u00a0Tobias Springenberg Scott\u00a0E. Reed Bobak Shahriari Noah\u00a0Y. Siegel Josh Merel \u00c7aglar G\u00fcl\u00e7ehre Nicolas Heess and Nando de Freitas. 2020. Critic Regularized Regression. CoRR abs\/2006.15134(2020) 1\u201324. arXiv:2006.15134https:\/\/arxiv.org\/abs\/2006.15134"}],"event":{"name":"RecSys '22: Sixteenth ACM Conference on Recommender Systems","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval","SIGCHI ACM Special Interest Group on Computer-Human Interaction"],"location":"Seattle WA USA","acronym":"RecSys '22"},"container-title":["Proceedings of the 16th ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3523227.3547370","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3523227.3547370","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:03:00Z","timestamp":1750186980000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3523227.3547370"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9,13]]},"references-count":13,"alternative-id":["10.1145\/3523227.3547370","10.1145\/3523227"],"URL":"https:\/\/doi.org\/10.1145\/3523227.3547370","relation":{},"subject":[],"published":{"date-parts":[[2022,9,13]]},"assertion":[{"value":"2022-09-13","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}