{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:10:16Z","timestamp":1765545016315,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":93,"publisher":"ACM","funder":[{"name":"CCF-Alimama Tech Kangaroo Fund","award":["No. 2024002"],"award-info":[{"award-number":["No. 2024002"]}]},{"name":"Research Impact Fund","award":["No.R1015-23"],"award-info":[{"award-number":["No.R1015-23"]}]},{"name":"Kuaishou"},{"name":"Collaborative Research Fund","award":["No.C1043-24GF"],"award-info":[{"award-number":["No.C1043-24GF"]}]},{"name":"Huawei Innovation Research Program, Huawei Fellowship"},{"name":"CCF-Tencent Open Fund, Tencent Rhino-Bird Focused Research Program"},{"name":"CCF-Ant Research Fund"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,3]]},"DOI":"10.1145\/3711896.3737250","type":"proceedings-article","created":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T21:04:26Z","timestamp":1754255066000},"page":"4635-4646","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Multi-task Offline Reinforcement Learning for Online Advertising in Recommender Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1995-3381","authenticated-orcid":false,"given":"Langming","family":"Liu","sequence":"first","affiliation":[{"name":"Taobao &amp; Tmall Group of Alibaba, Hangzhou, China and City University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5976-0707","authenticated-orcid":false,"given":"Wanyu","family":"Wang","sequence":"additional","affiliation":[{"name":"Southern University of Science and Technology, Shenzhen, China and City University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3524-6874","authenticated-orcid":false,"given":"Chi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Harbin Engineering University, Harbin, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2693-9426","authenticated-orcid":false,"given":"Bo","family":"Li","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1395-261X","authenticated-orcid":false,"given":"Hongzhi","family":"Yin","sequence":"additional","affiliation":[{"name":"University of Queensland, Brisbane, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4450-2251","authenticated-orcid":false,"given":"Xuetao","family":"Wei","sequence":"additional","affiliation":[{"name":"Southern University of Science and Technology, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3800-7543","authenticated-orcid":false,"given":"Wenbo","family":"Su","sequence":"additional","affiliation":[{"name":"Taobao &amp; Tmall Group of Alibaba, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4037-6315","authenticated-orcid":false,"given":"Bo","family":"Zheng","sequence":"additional","affiliation":[{"name":"Taobao &amp; Tmall Group of Alibaba, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2926-4416","authenticated-orcid":false,"given":"Xiangyu","family":"Zhao","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong, China"}]}],"member":"320","published-online":{"date-parts":[[2025,8,3]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Himan Abdollahpouri Masoud Mansoury Robin Burke and Bamshad Mobasher. 2019. The unfairness of popularity bias in recommendation. arXiv preprint arXiv:1907.13286(2019)."},{"key":"e_1_3_2_1_2_1","volume-title":"International conference on machine learning. PMLR, 22-31","author":"Achiam Joshua","year":"2017","unstructured":"Joshua Achiam, David Held, Aviv Tamar, and Pieter Abbeel. 2017. Constrained policy optimization. In International conference on machine learning. PMLR, 22-31."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543846"},{"key":"e_1_3_2_1_4_1","volume-title":"International Conference on Machine Learning. PMLR, 104-114","author":"Agarwal Rishabh","year":"2020","unstructured":"Rishabh Agarwal, Dale Schuurmans, and Mohammad Norouzi. 2020. An optimistic perspective on offline reinforcement learning. In International Conference on Machine Learning. PMLR, 104-114."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2187836.2187888"},{"volume-title":"Constrained Markov decision processes","author":"Altman Eitan","key":"e_1_3_2_1_6_1","unstructured":"Eitan Altman. 1999. Constrained Markov decision processes. Vol. 7. CRC press."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450074"},{"key":"e_1_3_2_1_9_1","volume-title":"Jamie Ryan Kiros, and Geoffrey E Hinton","author":"Ba Jimmy Lei","year":"2016","unstructured":"Jimmy Lei Ba, Jamie Ryan Kiros, and Geoffrey E Hinton. 2016. Layer normalization. arXiv preprint arXiv:1607.06450(2016)."},{"key":"e_1_3_2_1_10_1","unstructured":"Shaojie Bai J Zico Kolter and Vladlen Koltun. 2018. An empirical evaluation of generic convolutional and recurrent networks for sequence modeling. arXiv preprint arXiv:1803.01271(2018)."},{"key":"e_1_3_2_1_11_1","volume-title":"International Conference on Machine Learning. PMLR, 613-628","author":"Balseiro Santiago","year":"2020","unstructured":"Santiago Balseiro, Haihao Lu, and Vahab Mirrokni. 2020. Dual mirror descent for online allocation problems. In International Conference on Machine Learning. PMLR, 613-628."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583259"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539597.3570486"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3523227.3546788"},{"key":"e_1_3_2_1_15_1","volume-title":"Decision transformer: Reinforcement learning via sequence modeling. Advances in neural information processing systems","author":"Chen Lili","year":"2021","unstructured":"Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Misha Laskin, Pieter Abbeel, Aravind Srinivas, and Igor Mordatch. 2021a. Decision transformer: Reinforcement learning via sequence modeling. Advances in neural information processing systems, Vol. 34 (2021), 15084-15097."},{"key":"e_1_3_2_1_16_1","unstructured":"Xiaocong Chen Lina Yao Julian McAuley Guanglin Zhou and Xianzhi Wang. 2021b. A survey of deep reinforcement learning in recommender systems: A systematic review and future directions. arXiv preprint arXiv:2109.03540(2021)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_1_18_1","unstructured":"Yinlam Chow Ofir Nachum Aleksandra Faust Edgar Duenez-Guzman and Mohammad Ghavamzadeh. 2019. Lyapunov-based safe policy optimization for continuous control. arXiv preprint arXiv:1901.10031(2019)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.5555\/2882612.2882613"},{"key":"e_1_3_2_1_20_1","volume-title":"International journal of research in marketing","author":"Haan Evert De","year":"2016","unstructured":"Evert De Haan, Thorsten Wiesel, and Koen Pauwels. 2016. The effectiveness of different forms of online advertising for purchase conversion in a multiple-channel attribution framework. International journal of research in marketing, Vol. 33, 3 (2016), 491-507."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.2753\/MIS0742-1222240305"},{"key":"e_1_3_2_1_22_1","volume-title":"International conference on machine learning. PMLR, 49-58","author":"Finn Chelsea","year":"2016","unstructured":"Chelsea Finn, Sergey Levine, and Pieter Abbeel. 2016. Guided cost learning: Deep inverse optimal control via policy optimization. In International conference on machine learning. PMLR, 49-58."},{"key":"e_1_3_2_1_23_1","volume-title":"International conference on machine learning. PMLR","author":"Fujimoto Scott","year":"2019","unstructured":"Scott Fujimoto, David Meger, and Doina Precup. 2019. Off-policy deep reinforcement learning without exploration. In International conference on machine learning. PMLR, 2052-2062."},{"key":"e_1_3_2_1_24_1","unstructured":"Jingtong Gao Yewen Li Shuai Mao Peng Jiang Nan Jiang Yejing Wang Qingpeng Cai Fei Pan Kun Gai Bo An et al. 2025. Generative Auto-Bidding with Value-Guided Explorations. arXiv preprint arXiv:2504.14587(2025)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441824"},{"key":"e_1_3_2_1_26_1","unstructured":"Qi Hao Tianze Luo and Guangda Huzhang. 2021. Re-ranking with constraints on diversified exposures for homepage recommender system. arXiv preprint arXiv:2112.07621(2021)."},{"key":"e_1_3_2_1_27_1","volume-title":"International Conference on Machine Learning. PMLR, 4060-4070","author":"Hao Xiaotian","year":"2020","unstructured":"Xiaotian Hao, Zhaoqing Peng, Yi Ma, Guan Wang, Junqi Jin, Jianye Hao, Shan Chen, Rongquan Bai, Mingzhou Xie, Miao Xu, et al., 2020. Dynamic knapsack optimization towards efficient multi-channel sequential advertising. In International Conference on Machine Learning. PMLR, 4060-4070."},{"key":"e_1_3_2_1_28_1","first-page":"157","volume-title":"Foundations and Trends\u00ae in Optimization","volume":"2","author":"Elad","year":"2016","unstructured":"Elad Hazan et al., 2016. Introduction to online convex optimization. Foundations and Trends\u00ae in Optimization, Vol. 2, 3-4 (2016), 157-325."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"e_1_3_2_1_31_1","volume-title":"Generative adversarial imitation learning. Advances in neural information processing systems","author":"Ho Jonathan","year":"2016","unstructured":"Jonathan Ho and Stefano Ermon. 2016. Generative adversarial imitation learning. Advances in neural information processing systems, Vol. 29 (2016)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3054912"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10737"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.5555\/1622737.1622748"},{"key":"e_1_3_2_1_35_1","volume-title":"International Conference on Machine Learning. PMLR, 5562-5571","author":"Kim Hyunjik","year":"2021","unstructured":"Hyunjik Kim, George Papamakarios, and Andriy Mnih. 2021. The lipschitz constant of self-attention. In International Conference on Machine Learning. PMLR, 5562-5571."},{"key":"e_1_3_2_1_36_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980(2014).","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980(2014)."},{"key":"e_1_3_2_1_37_1","unstructured":"Simon Kingsnorth. 2022. Digital marketing strategy: an integrated approach to online marketing. Kogan Page Publishers."},{"key":"e_1_3_2_1_38_1","unstructured":"Haruka Kiyohara Kosuke Kawakami and Yuta Saito. 2021. Accelerating offline reinforcement learning application in real-time bidding and recommendation: Potential use of simulation. arXiv preprint arXiv:2109.08331(2021)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671555"},{"key":"e_1_3_2_1_40_1","unstructured":"Ilya Kostrikov Ashvin Nair and Sergey Levine. 2021. Offline reinforcement learning with implicit q-learning. arXiv preprint arXiv:2110.06169(2021)."},{"key":"e_1_3_2_1_41_1","unstructured":"Yueh-Ning Ku Mikhail Kuznetsov Shaunak Mishra and Paloma de Juan. 2023. Staging e-commerce products for online advertising using retrieval assisted image generation. arXiv preprint arXiv:2307.15326(2023)."},{"key":"e_1_3_2_1_42_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Kumar Aviral","year":"2019","unstructured":"Aviral Kumar, Justin Fu, Matthew Soh, George Tucker, and Sergey Levine. 2019. Stabilizing off-policy q-learning via bootstrapping error reduction. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_43_1","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar Aviral","year":"2020","unstructured":"Aviral Kumar, Aurick Zhou, George Tucker, and Sergey Levine. 2020b. Conservative q-learning for offline reinforcement learning. Advances in Neural Information Processing Systems, Vol. 33 (2020), 1179-1191.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDMW51313.2020.00020"},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of the 18th ACM SIGKDD international conference on Knowledge discovery and data mining. 768-776","author":"Orten Burkay","year":"2012","unstructured":"Kuang-chih Lee, Burkay Orten, Ali Dasdan, and Wentong Li. 2012. Estimating conversion rate in display advertising from past erformance data. In Proceedings of the 18th ACM SIGKDD international conference on Knowledge discovery and data mining. 768-776."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3615137"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512109"},{"key":"e_1_3_2_1_48_1","volume-title":"Facing the cold start problem in recommender systems. Expert systems with applications","author":"Lika Blerina","year":"2014","unstructured":"Blerina Lika, Kostas Kolomvatsos, and Stathes Hadjiefthymiades. 2014. Facing the cold start problem in recommender systems. Expert systems with applications, Vol. 41, 4 (2014), 2065-2073."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583339"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591717"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","unstructured":"Tao Liu Qi Xu Wei Shi Zhigang Hua and Shuang Yang. 2025. Session-Level Dynamic Ad Load Optimization using Offline Robust Reinforcement Learning. arXiv preprint arXiv:2501.05591(2025).","DOI":"10.1145\/3690624.3709437"},{"key":"e_1_3_2_1_52_1","unstructured":"Weiwen Liu Yunjia Xi Jiarui Qin Fei Sun Bo Chen Weinan Zhang Rui Zhang and Ruiming Tang. 2022. Neural re-ranking in multi-stage recommender systems: A review. arXiv preprint arXiv:2202.06602(2022)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/614"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671531"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583467"},{"key":"e_1_3_2_1_56_1","volume-title":"Proc. icml","volume":"30","author":"Maas Andrew L","year":"2013","unstructured":"Andrew L Maas, Awni Y Hannun, Andrew Y Ng, et al., 2013. Rectifier nonlinearities improve neural network acoustic models. In Proc. icml, Vol. 30. Atlanta, GA, 3."},{"key":"e_1_3_2_1_57_1","first-page":"3","article-title":"Near-optimum online ad allocation for targeted advertising","volume":"6","author":"Naor Joseph","year":"2018","unstructured":"Joseph Naor and David Wajc. 2018. Near-optimum online ad allocation for targeted advertising. ACM Transactions on Economics and Computation (TEAC), Vol. 6, 3-4 (2018), 1-20.","journal-title":"ACM Transactions on Economics and Computation (TEAC)"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330783"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462836"},{"key":"e_1_3_2_1_60_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever et al. 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_1_61_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Rafailov Rafael","year":"2024","unstructured":"Rafael Rafailov, Archit Sharma, Eric Mitchell, Christopher D Manning, Stefano Ermon, and Chelsea Finn. 2024. Direct preference optimization: Your language model is secretly a reward model. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3271677"},{"key":"e_1_3_2_1_63_1","volume-title":"Weight normalization: A simple reparameterization to accelerate training of deep neural networks. Advances in neural information processing systems","author":"Salimans Tim","year":"2016","unstructured":"Tim Salimans and Durk P Kingma. 2016. Weight normalization: A simple reparameterization to accelerate training of deep neural networks. Advances in neural information processing systems, Vol. 29 (2016)."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3481941"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014902"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/2339530.2339730"},{"key":"e_1_3_2_1_67_1","unstructured":"Chen Tessler Daniel J Mankowitz and Shie Mannor. 2018. Reward constrained policy optimization. arXiv preprint arXiv:1805.11074(2018)."},{"key":"e_1_3_2_1_68_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591648"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591750"},{"key":"e_1_3_2_1_71_1","volume-title":"Reinforcement learning. Adaptation, learning, and optimization","author":"Wiering Marco A","year":"2012","unstructured":"Marco A Wiering and Martijn Van Otterlo. 2012. Reinforcement learning. Adaptation, learning, and optimization, Vol. 12, 3 (2012), 729."},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358031"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599868"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539108"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.2016.2451"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i9.26317"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401467"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.5555\/2805234.2805235"},{"key":"e_1_3_2_1_79_1","first-page":"20410","article-title":"BCORLE (\u0142ambda): An Offline Reinforcement Learning and Evaluation Framework for Coupons Allocation in E-commerce Market","volume":"34","author":"Zhang Yang","year":"2021","unstructured":"Yang Zhang, Bo Tang, Qingyu Yang, Dou An, Hongyin Tang, Chenyang Xi, Xueying Li, and Feiyu Xiong. 2021. BCORLE (\u0142ambda): An Offline Reinforcement Learning and Evaluation Framework for Coupons Allocation in E-commerce Market. Advances in Neural Information Processing Systems, Vol. 34 (2021), 20410-20422.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219918"},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583418"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1145\/3533274.3533277"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i1.16156"},{"key":"e_1_3_2_1_84_1","volume-title":"Long Xia, Jiliang Tang, and Dawei Yin with Martin Vesely as coordinator. ACM sigweb newsletter","author":"Zhao Xiangyu","year":"2019","unstructured":"Xiangyu Zhao, Long Xia, Jiliang Tang, and Dawei Yin. 2019. '' Deep reinforcement learning for search, recommendation, and online advertising: a survey'' by Xiangyu Zhao, Long Xia, Jiliang Tang, and Dawei Yin with Martin Vesely as coordinator. ACM sigweb newsletter, Vol. 2019, Spring (2019), 1-15."},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240323.3240374"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412044"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450125"},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219886"},{"key":"e_1_3_2_1_89_1","unstructured":"Xiangyu Zhao Liang Zhang Long Xia Zhuoye Ding Dawei Yin and Jiliang Tang. 2017. Deep reinforcement learning for list-wise recommendations. arXiv preprint arXiv:1801.00209(2017)."},{"key":"e_1_3_2_1_90_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403384"},{"key":"e_1_3_2_1_91_1","volume-title":"international conference on machine learning. PMLR, 27042-27059","author":"Zheng Qinqing","year":"2022","unstructured":"Qinqing Zheng, Amy Zhang, and Aditya Grover. 2022. Online decision transformer. In international conference on machine learning. PMLR, 27042-27059."},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219823"},{"key":"e_1_3_2_1_93_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i4.25677"}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Toronto ON Canada","acronym":"KDD '25"},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711896.3737250","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T14:46:44Z","timestamp":1755355604000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711896.3737250"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,3]]},"references-count":93,"alternative-id":["10.1145\/3711896.3737250","10.1145\/3711896"],"URL":"https:\/\/doi.org\/10.1145\/3711896.3737250","relation":{},"subject":[],"published":{"date-parts":[[2025,8,3]]},"assertion":[{"value":"2025-08-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}