{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,13]],"date-time":"2026-06-13T00:52:09Z","timestamp":1781311929180,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":21,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,9,14]],"date-time":"2023-09-14T00:00:00Z","timestamp":1694649600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,9,14]]},"DOI":"10.1145\/3604915.3610641","type":"proceedings-article","created":{"date-parts":[[2023,9,14]],"date-time":"2023-09-14T22:40:23Z","timestamp":1694731223000},"page":"1-1","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Integrating Offline Reinforcement Learning with Transformers for Sequential Recommendation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1521-391X","authenticated-orcid":false,"given":"Xumei","family":"Xi","sequence":"first","affiliation":[{"name":"School of Operations Research and Information Engineering, Cornell University, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4620-8532","authenticated-orcid":false,"given":"Yuke","family":"Zhao","sequence":"additional","affiliation":[{"name":"Bloomberg LP, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9770-4033","authenticated-orcid":false,"given":"Quan","family":"Liu","sequence":"additional","affiliation":[{"name":"Bloomberg, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6073-2763","authenticated-orcid":false,"given":"Liwen","family":"Ouyang","sequence":"additional","affiliation":[{"name":"Bloomberg, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9197-7794","authenticated-orcid":false,"given":"Yang","family":"Wu","sequence":"additional","affiliation":[{"name":"Independent Researcher, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,9,14]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460231.3478864"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2792838.2798723"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICICCT.2018.8473335"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3089941"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441824"},{"key":"e_1_3_2_1_6_1","volume-title":"The movielens datasets: History and context. Acm transactions on interactive intelligent systems (tiis) 5, 4","author":"Harper F\u00a0Maxwell","year":"2015","unstructured":"F\u00a0Maxwell Harper and Joseph\u00a0A Konstan. 2015. The movielens datasets: History and context. Acm transactions on interactive intelligent systems (tiis) 5, 4 (2015), 1\u201319."},{"key":"e_1_3_2_1_7_1","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"Hidasi Bal\u00e1zs","unstructured":"Bal\u00e1zs Hidasi and Domonkos Tikk. 2012. Fast ALS-Based Tensor Factorization for Context-Aware Recommendation from Implicit Feedback. In Machine Learning and Knowledge Discovery in Databases. Springer Berlin Heidelberg, Berlin, Heidelberg, 67\u201382."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2018.00035"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Yehuda Koren Robert Bell and Chris Volinsky. 2009. MATRIX FACTORIZATION TECHNIQUES FOR RECOMMENDER SYSTEMS.","DOI":"10.1109\/MC.2009.263"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3359554"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106170"},{"key":"e_1_3_2_1_12_1","unstructured":"Alec Radford Jeff Wu Rewon Child David Luan Dario Amodei and Ilya Sutskever. 2019. Language Models are Unsupervised Multitask Learners. (2019)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/336992.337035"},{"key":"e_1_3_2_1_14_1","volume-title":"Mastering atari, go, chess and shogi by planning with a learned model. Nature 588, 7839","author":"Schrittwieser Julian","year":"2020","unstructured":"Julian Schrittwieser, Ioannis Antonoglou, Thomas Hubert, Karen Simonyan, Laurent Sifre, Simon Schmitt, Arthur Guez, Edward Lockhart, Demis Hassabis, Thore Graepel, 2020. Mastering atari, go, chess and shogi by planning with a learned model. Nature 588, 7839 (2020), 604\u2013609."},{"key":"e_1_3_2_1_15_1","volume-title":"CURL: Contrastive Unsupervised Representations for Reinforcement Learning. arxiv:2004.04136\u00a0[cs.LG]","author":"Srinivas Aravind","year":"2020","unstructured":"Aravind Srinivas, Michael Laskin, and Pieter Abbeel. 2020. CURL: Contrastive Unsupervised Representations for Reinforcement Learning. arxiv:2004.04136\u00a0[cs.LG]"},{"key":"e_1_3_2_1_16_1","volume-title":"Reinforcement learning: An introduction","author":"Sutton S","unstructured":"Richard\u00a0S Sutton and Andrew\u00a0G Barto. 2018. Reinforcement learning: An introduction. MIT press."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159656"},{"key":"e_1_3_2_1_18_1","volume-title":"Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Vol.\u00a030. Curran Associates","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141\u00a0ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Vol.\u00a030. Curran Associates, Inc.https:\/\/proceedings.neurips.cc\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3496375"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412258"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219886"}],"event":{"name":"RecSys '23: Seventeenth ACM Conference on Recommender Systems","location":"Singapore Singapore","acronym":"RecSys '23","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval","SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGecom Special Interest Group on Economics and Computation"]},"container-title":["Proceedings of the 17th ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3604915.3610641","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3604915.3610641","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:36Z","timestamp":1750178796000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3604915.3610641"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,14]]},"references-count":21,"alternative-id":["10.1145\/3604915.3610641","10.1145\/3604915"],"URL":"https:\/\/doi.org\/10.1145\/3604915.3610641","relation":{},"subject":[],"published":{"date-parts":[[2023,9,14]]},"assertion":[{"value":"2023-09-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}