{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:03:45Z","timestamp":1750309425076,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":12,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T00:00:00Z","timestamp":1728345600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/100004318","name":"Microsoft","doi-asserted-by":"publisher","id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/100004318","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,8]]},"DOI":"10.1145\/3640457.3691698","type":"proceedings-article","created":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T15:39:28Z","timestamp":1728401968000},"page":"1180-1183","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Multi-Preview Recommendation via Reinforcement Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-6473-8529","authenticated-orcid":false,"given":"Yang","family":"Xu","sequence":"first","affiliation":[{"name":"Department of Statistics, North Carolina State University, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3576-0512","authenticated-orcid":false,"given":"Kuan-Ting","family":"Lai","sequence":"additional","affiliation":[{"name":"Feeds &amp; Verticals, Microsoft, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4707-1634","authenticated-orcid":false,"given":"Pengcheng","family":"Xiong","sequence":"additional","affiliation":[{"name":"Microsoft, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1885-0609","authenticated-orcid":false,"given":"Zhong","family":"Wu","sequence":"additional","affiliation":[{"name":"Microsoft, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,10,8]]},"reference":[{"volume-title":"Constrained Markov decision processes","author":"Altman Eitan","unstructured":"Eitan Altman. 2021. Constrained Markov decision processes. Routledge.","key":"e_1_3_2_1_1_1"},{"key":"e_1_3_2_1_2_1","volume-title":"Neural combinatorial optimization with reinforcement learning. arXiv preprint arXiv:1611.09940","author":"Bello Irwan","year":"2016","unstructured":"Irwan Bello, Hieu Pham, Quoc\u00a0V Le, Mohammad Norouzi, and Samy Bengio. 2016. Neural combinatorial optimization with reinforcement learning. arXiv preprint arXiv:1611.09940 (2016)."},{"key":"e_1_3_2_1_3_1","volume-title":"Value constrained model-free continuous control. arXiv preprint arXiv:1902.04623","author":"Bohez Steven","year":"2019","unstructured":"Steven Bohez, Abbas Abdolmaleki, Michael Neunert, Jonas Buchli, Nicolas Heess, and Raia Hadsell. 2019. Value constrained model-free continuous control. arXiv preprint arXiv:1902.04623 (2019)."},{"key":"e_1_3_2_1_4_1","first-page":"609","article-title":"Reinforcement learning with combinatorial actions: An application to vehicle routing","volume":"33","author":"Delarue Arthur","year":"2020","unstructured":"Arthur Delarue, Ross Anderson, and Christian Tjandraatmadja. 2020. Reinforcement learning with combinatorial actions: An application to vehicle routing. Advances in Neural Information Processing Systems 33 (2020), 609\u2013620.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_5_1","first-page":"13303","article-title":"Provably efficient model-free constrained rl with linear function approximation","volume":"35","author":"Ghosh Arnob","year":"2022","unstructured":"Arnob Ghosh, Xingyu Zhou, and Ness Shroff. 2022. Provably efficient model-free constrained rl with linear function approximation. Advances in Neural Information Processing Systems 35 (2022), 13303\u201313315.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_6_1","volume-title":"International Conference on Machine Learning. PMLR, 4074\u20134084","author":"Hao Botao","year":"2021","unstructured":"Botao Hao, Xiang Ji, Yaqi Duan, Hao Lu, Csaba Szepesvari, and Mengdi Wang. 2021. Bootstrapping fitted q-evaluation for off-policy inference. In International Conference on Machine Learning. PMLR, 4074\u20134084."},{"key":"e_1_3_2_1_7_1","volume-title":"Ranked reward: Enabling self-play reinforcement learning for combinatorial optimization. arXiv preprint arXiv:1807.01672","author":"Laterre Alexandre","year":"2018","unstructured":"Alexandre Laterre, Yunguan Fu, Mohamed\u00a0Khalil Jabri, Alain-Sam Cohen, David Kas, Karl Hajjar, Torbjorn\u00a0S Dahl, Amine Kerkeni, and Karim Beguir. 2018. Ranked reward: Enabling self-play reinforcement learning for combinatorial optimization. arXiv preprint arXiv:1807.01672 (2018)."},{"key":"e_1_3_2_1_8_1","volume-title":"International Conference on Machine Learning. PMLR, 3703\u20133712","author":"Le Hoang","year":"2019","unstructured":"Hoang Le, Cameron Voloshin, and Yisong Yue. 2019. Batch policy learning under constraints. In International Conference on Machine Learning. PMLR, 3703\u20133712."},{"volume-title":"The 30th international joint conference on artificial intelligence (ijcai).","author":"Liu Yongshuai","unstructured":"Yongshuai Liu, Avishai Halev, and Xin Liu. 2021. Policy learning with constraints in model-free reinforcement learning: A survey. In The 30th international joint conference on artificial intelligence (ijcai).","key":"e_1_3_2_1_9_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_10_1","DOI":"10.1016\/j.cor.2021.105400"},{"key":"e_1_3_2_1_11_1","volume-title":"International Conference on Machine Learning. PMLR, 9797\u20139806","author":"Wachi Akifumi","year":"2020","unstructured":"Akifumi Wachi and Yanan Sui. 2020. Safe reinforcement learning in constrained markov decision processes. In International Conference on Machine Learning. PMLR, 9797\u20139806."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_12_1","DOI":"10.1109\/AIC57670.2023.10263956"}],"event":{"sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval","SIGCHI ACM Special Interest Group on Computer-Human Interaction"],"acronym":"RecSys '24","name":"RecSys '24: 18th ACM Conference on Recommender Systems","location":"Bari Italy"},"container-title":["18th ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640457.3691698","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3640457.3691698","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:33Z","timestamp":1750294713000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640457.3691698"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,8]]},"references-count":12,"alternative-id":["10.1145\/3640457.3691698","10.1145\/3640457"],"URL":"https:\/\/doi.org\/10.1145\/3640457.3691698","relation":{},"subject":[],"published":{"date-parts":[[2024,10,8]]},"assertion":[{"value":"2024-10-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}