{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T10:08:15Z","timestamp":1775815695930,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","funder":[{"name":"Kuaishou"},{"name":"Research Impact Fund","award":["No.R1015-23"],"award-info":[{"award-number":["No.R1015-23"]}]},{"name":"Collaborative Research Fund","award":["No.C1043-24GF"],"award-info":[{"award-number":["No.C1043-24GF"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,13]]},"DOI":"10.1145\/3726302.3729987","type":"proceedings-article","created":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T14:55:26Z","timestamp":1752504926000},"page":"244-254","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Generative Auto-Bidding with Value-Guided Explorations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4470-5972","authenticated-orcid":false,"given":"Jingtong","family":"Gao","sequence":"first","affiliation":[{"name":"City University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0073-123X","authenticated-orcid":false,"given":"Yewen","family":"Li","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8430-5871","authenticated-orcid":false,"given":"Shuai","family":"Mao","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8133-5010","authenticated-orcid":false,"given":"Peng","family":"Jiang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1573-1487","authenticated-orcid":false,"given":"Nan","family":"Jiang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2852-9910","authenticated-orcid":false,"given":"Yejing","family":"Wang","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6451-9299","authenticated-orcid":false,"given":"Qingpeng","family":"Cai","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1739-0868","authenticated-orcid":false,"given":"Fei","family":"Pan","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9266-0780","authenticated-orcid":false,"given":"Peng","family":"Jiang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3636-3618","authenticated-orcid":false,"given":"Kun","family":"Gai","sequence":"additional","affiliation":[{"name":"Unaffiliated, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7064-7438","authenticated-orcid":false,"given":"Bo","family":"An","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2926-4416","authenticated-orcid":false,"given":"Xiangyu","family":"Zhao","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong, China"}]}],"member":"320","published-online":{"date-parts":[[2025,7,13]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"Gagan Aggarwal Ashwinkumar Badanidiyuru Santiago R Balseiro Kshipra Bhawalkar Yuan Deng Zhe Feng Gagan Goel Christopher Liaw Haihao Lu Mohammad Mahdian et al. 2024. Auto-bidding and auctions in online advertising: A survey. ACM SIGecom Exchanges (2024) 159-183.","DOI":"10.1145\/3699824.3699838"},{"key":"e_1_3_2_1_2_1","first-page":"241","article-title":"General auction mechanism for search advertising","author":"Aggarwal Gagan","year":"2009","unstructured":"Gagan Aggarwal, Shan Muthukrishnan, D\u00e1vid P\u00e1l, and Martin P\u00e1l. 2009. General auction mechanism for search advertising. In Proc. of WWW. 241-250.","journal-title":"Proc. of WWW."},{"key":"e_1_3_2_1_3_1","volume-title":"Real-time bid optimization for group-buying ads. ACM Transactions on Intelligent Systems and Technology (TIST)","author":"Balakrishnan Raju","year":"2014","unstructured":"Raju Balakrishnan and Rushi P Bhatt. 2014. Real-time bid optimization for group-buying ads. ACM Transactions on Intelligent Systems and Technology (TIST) (2014), 1-21."},{"key":"e_1_3_2_1_4_1","volume-title":"Automated bidding in computational markets: an application in market-based allocation of computing services. Autonomous Agents and Multi-Agent Systems","author":"Borissov Nikolay","year":"2010","unstructured":"Nikolay Borissov, Dirk Neumann, and Christof Weinhardt. 2010. Automated bidding in computational markets: an application in market-based allocation of computing services. Autonomous Agents and Multi-Agent Systems (2010), 115-142."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018661.3018702"},{"key":"e_1_3_2_1_6_1","first-page":"15084","volume-title":"Proc. of NeurIPS","author":"Chen Lili","year":"2021","unstructured":"Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Misha Laskin, Pieter Abbeel, Aravind Srinivas, and Igor Mordatch. 2021. Decision transformer: Reinforcement learning via sequence modeling. Proc. of NeurIPS (2021), 15084-15097."},{"key":"e_1_3_2_1_7_1","first-page":"1307","article-title":"Real-time bidding algorithms for performance-based display ad allocation","author":"Chen Ye","year":"2011","unstructured":"Ye Chen, Pavel Berkhin, Bo Anderson, and Nikhil R Devanur. 2011. Real-time bidding algorithms for performance-based display ad allocation. In Proc. of KDD. 1307-1315.","journal-title":"Proc. of KDD."},{"key":"e_1_3_2_1_8_1","volume-title":"Internet advertising and the generalized second-price auction: Selling billions of dollars worth of keywords. American economic review","author":"Edelman Benjamin","year":"2007","unstructured":"Benjamin Edelman, Michael Ostrovsky, and Michael Schwarz. 2007. Internet advertising and the generalized second-price auction: Selling billions of dollars worth of keywords. American economic review (2007), 242-259."},{"key":"e_1_3_2_1_9_1","volume-title":"Proc. of ICML. 2052-2062","author":"Fujimoto Scott","year":"2019","unstructured":"Scott Fujimoto, David Meger, and Doina Precup. 2019. Off-policy deep reinforcement learning without exploration. In Proc. of ICML. 2052-2062."},{"key":"e_1_3_2_1_10_1","volume-title":"Proc. of WWW.","author":"Gao Jingtong","unstructured":"Jingtong Gao, Bo Chen, Xiangyu Zhao, Weiwen Liu, Xiangyang Li, Yichao Wang, Wanyu Wang, Huifeng Guo, and Ruiming Tang. [n.d.]. LLM4Rerank: LLM-based Auto-Reranking Framework for Recommendations. In Proc. of WWW."},{"key":"e_1_3_2_1_11_1","first-page":"653","article-title":"HierRec","author":"Gao Jingtong","year":"2024","unstructured":"Jingtong Gao, Bo Chen, Menghui Zhu, Xiangyu Zhao, Xiaopeng Li, Yuhao Wang, Yichao Wang, Huifeng Guo, and Ruiming Tang. 2024a. HierRec: Scenario-Aware Hierarchical Modeling for Multi-scenario Recommendations. In Proc. of CIKM. 653-662.","journal-title":"Scenario-Aware Hierarchical Modeling for Multi-scenario Recommendations. In Proc. of CIKM."},{"key":"e_1_3_2_1_12_1","volume-title":"SampleLLM: Optimizing Tabular Data Synthesis in Recommendations. arXiv preprint arXiv:2501.16125","author":"Gao Jingtong","year":"2025","unstructured":"Jingtong Gao, Zhaocheng Du, Xiaopeng Li, Yichao Wang, Xiangyang Li, Huifeng Guo, Ruiming Tang, and Xiangyu Zhao. 2025. SampleLLM: Optimizing Tabular Data Synthesis in Recommendations. arXiv preprint arXiv:2501.16125 (2025)."},{"key":"e_1_3_2_1_13_1","first-page":"1478","article-title":"AutoTransfer: Instance transfer for cross-domain recommendations","author":"Gao Jingtong","year":"2023","unstructured":"Jingtong Gao, Xiangyu Zhao, Bo Chen, Fan Yan, Huifeng Guo, and Ruiming Tang. 2023. AutoTransfer: Instance transfer for cross-domain recommendations. In Proc. of SIGIR. 1478-1487.","journal-title":"Proc. of SIGIR."},{"key":"e_1_3_2_1_14_1","volume-title":"SMLP4Rec: an Efficient all-MLP architecture for sequential recommendations. ACM Transactions on Information Systems","author":"Gao Jingtong","year":"2024","unstructured":"Jingtong Gao, Xiangyu Zhao, Muyang Li, Minghao Zhao, Runze Wu, Ruocheng Guo, Yiding Liu, and Dawei Yin. 2024b. SMLP4Rec: an Efficient all-MLP architecture for sequential recommendations. ACM Transactions on Information Systems (2024), 1-23."},{"key":"e_1_3_2_1_15_1","volume-title":"Reinforcement learning enabled dynamic bidding strategy for instant delivery trading. Computers & Industrial Engineering","author":"Guo Chaojie","year":"2021","unstructured":"Chaojie Guo, Russell G Thompson, Greg Foliente, and Xiaoshuai Peng. 2021. Reinforcement learning enabled dynamic bidding strategy for instant delivery trading. Computers & Industrial Engineering (2021), 107596."},{"key":"e_1_3_2_1_16_1","first-page":"5038","article-title":"AIGB: Generative Auto-bidding via Conditional Diffusion Modeling","author":"Guo Jiayan","year":"2024","unstructured":"Jiayan Guo, Yusen Huo, Zhilin Zhang, Tianyu Wang, Chuan Yu, Jian Xu, Bo Zheng, and Yan Zhang. 2024. AIGB: Generative Auto-bidding via Conditional Diffusion Modeling. In Proc. of KDD. 5038-5049.","journal-title":"Proc. of KDD."},{"key":"e_1_3_2_1_17_1","first-page":"2993","article-title":"A unified solution to constrained bidding in online display advertising","author":"He Yue","year":"2021","unstructured":"Yue He, Xiujun Chen, Di Wu, Junwei Pan, Qing Tan, Chuan Yu, Jian Xu, and Xiaoqiang Zhu. 2021. A unified solution to constrained bidding in online display advertising. In Proc. of KDD. 2993-3001.","journal-title":"Proc. of KDD."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2024.04.191"},{"key":"e_1_3_2_1_19_1","volume-title":"Accelerating offline reinforcement learning application in real-time bidding and recommendation: Potential use of simulation. arXiv preprint arXiv:2109.08331","author":"Kiyohara Haruka","year":"2021","unstructured":"Haruka Kiyohara, Kosuke Kawakami, and Yuta Saito. 2021. Accelerating offline reinforcement learning application in real-time bidding and recommendation: Potential use of simulation. arXiv preprint arXiv:2109.08331 (2021)."},{"key":"e_1_3_2_1_20_1","first-page":"5774","article-title":"Offline reinforcement learning with fisher divergence critic regularization","author":"Kostrikov Ilya","year":"2021","unstructured":"Ilya Kostrikov, Rob Fergus, Jonathan Tompson, and Ofir Nachum. 2021. Offline reinforcement learning with fisher divergence critic regularization. In Proc. of ICML. 5774-5783.","journal-title":"Proc. of ICML."},{"key":"e_1_3_2_1_21_1","volume-title":"Proc. of ICLR.","author":"Kostrikov Ilya","unstructured":"Ilya Kostrikov, Ashvin Nair, and Sergey Levine. [n.d.]. Offline Reinforcement Learning with Implicit Q-Learning. In Proc. of ICLR."},{"key":"e_1_3_2_1_22_1","first-page":"1179","volume-title":"Proc. of NeurIPS","author":"Kumar Aviral","year":"2020","unstructured":"Aviral Kumar, Aurick Zhou, George Tucker, and Sergey Levine. 2020. Conservative q-learning for offline reinforcement learning. Proc. of NeurIPS (2020), 1179-1191."},{"key":"e_1_3_2_1_23_1","volume-title":"Large language models (LLMs): survey, technical frameworks, and future challenges. Artificial Intelligence Review","author":"Kumar Pranjal","year":"2024","unstructured":"Pranjal Kumar. 2024. Large language models (LLMs): survey, technical frameworks, and future challenges. Artificial Intelligence Review (2024), 260."},{"key":"e_1_3_2_1_24_1","first-page":"101","article-title":"STRec: Sparse transformer for sequential recommendations","author":"Li Chengxi","year":"2023","unstructured":"Chengxi Li, Yejing Wang, Qidong Liu, Xiangyu Zhao, Wanyu Wang, Yiqi Wang, Lixin Zou, Wenqi Fan, and Qing Li. 2023a. STRec: Sparse transformer for sequential recommendations. In Proc. of RecSys. 101-111.","journal-title":"Proc. of RecSys."},{"key":"e_1_3_2_1_25_1","first-page":"1199","article-title":"Gromov-wasserstein guided representation learning for cross-domain recommendation","author":"Li Xinhang","year":"2022","unstructured":"Xinhang Li, Zhaopeng Qiu, Xiangyu Zhao, Zihao Wang, Yong Zhang, Chunxiao Xing, and Xian Wu. 2022. Gromov-wasserstein guided representation learning for cross-domain recommendation. In Proc. of CIKM. 1199-1208.","journal-title":"Proc. of CIKM."},{"key":"e_1_3_2_1_26_1","first-page":"1268","article-title":"Hamur: Hyper adapter for multi-domain recommendation","author":"Li Xiaopeng","year":"2023","unstructured":"Xiaopeng Li, Fan Yan, Xiangyu Zhao, Yichao Wang, Bo Chen, Huifeng Guo, and Ruiming Tang. 2023b. Hamur: Hyper adapter for multi-domain recommendation. In Proc. of CIKM. 1268-1277.","journal-title":"Proc. of CIKM."},{"key":"e_1_3_2_1_27_1","volume-title":"GAS: Generative Auto-bidding with Post-training Search. arXiv preprint arXiv:2412.17018","author":"Li Yewen","year":"2024","unstructured":"Yewen Li, Shuai Mao, Jingtong Gao, Nan Jiang, Yunjian Xu, Qingpeng Cai, Fei Pan, Peng Jiang, and Bo An. 2024. GAS: Generative Auto-bidding with Post-training Search. arXiv preprint arXiv:2412.17018 (2024)."},{"key":"e_1_3_2_1_28_1","volume-title":"A survey of transformers. AI open","author":"Lin Tianyang","year":"2022","unstructured":"Tianyang Lin, Yuxin Wang, Xiangyang Liu, and Xipeng Qiu. 2022. A survey of transformers. AI open (2022), 111-132."},{"key":"e_1_3_2_1_29_1","first-page":"1003","article-title":"Autodenoise: Automatic data instance denoising for recommendations","author":"Lin Weilin","year":"2023","unstructured":"Weilin Lin, Xiangyu Zhao, Yejing Wang, Yuanshao Zhu, and Wanyu Wang. 2023. Autodenoise: Automatic data instance denoising for recommendations. In Proc. of WWW. 1003-1011.","journal-title":"Proc. of WWW."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512078"},{"key":"e_1_3_2_1_31_1","first-page":"2307","article-title":"Automated embedding size search in deep recommender systems","author":"Liu Haochen","year":"2020","unstructured":"Haochen Liu, Xiangyu Zhao, Chong Wang, Xiaobing Liu, and Jiliang Tang. 2020. Automated embedding size search in deep recommender systems. In Proc. of SIGIR. 2307-2316.","journal-title":"Proc. of SIGIR."},{"key":"e_1_3_2_1_32_1","first-page":"833","article-title":"Exploration and regularization of the latent action space in recommendation","author":"Liu Shuchang","year":"2023","unstructured":"Shuchang Liu, Qingpeng Cai, Bowen Sun, Yuhao Wang, Ji Jiang, Dong Zheng, Peng Jiang, Kun Gai, Xiangyu Zhao, and Yongfeng Zhang. 2023a. Exploration and regularization of the latent action space in recommendation. In Proc. of WWW. 833-844.","journal-title":"Proc. of WWW."},{"key":"e_1_3_2_1_33_1","first-page":"21611","article-title":"Constrained decision transformer for offline safe reinforcement learning","author":"Liu Zuxin","year":"2023","unstructured":"Zuxin Liu, Zijian Guo, Yihang Yao, Zhepeng Cen, Wenhao Yu, Tingnan Zhang, and Ding Zhao. 2023b. Constrained decision transformer for offline safe reinforcement learning. In Proc. of ICML. 21611-21630.","journal-title":"Proc. of ICML."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657829"},{"key":"e_1_3_2_1_35_1","first-page":"1273","article-title":"Multi-task recommendations with reinforcement learning","author":"Liu Ziru","year":"2023","unstructured":"Ziru Liu, Jiejie Tian, Qingpeng Cai, Xiangyu Zhao, Jingtong Gao, Shuchang Liu, Dayou Chen, Tonghao He, Dong Zheng, Peng Jiang, et al., 2023c. Multi-task recommendations with reinforcement learning. In Proc. of WWW. 1273-1282.","journal-title":"Proc. of WWW."},{"key":"e_1_3_2_1_36_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov I","year":"2017","unstructured":"I Loshchilov. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_1_37_1","volume-title":"BOXES: An experiment in adaptive control. Machine intelligence","author":"Michie Donald","year":"1968","unstructured":"Donald Michie and Roger A Chambers. 1968. BOXES: An experiment in adaptive control. Machine intelligence (1968), 137-152."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/JRPROC.1961.287775"},{"key":"e_1_3_2_1_39_1","volume-title":"Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602","author":"Mnih Volodymyr","year":"2013","unstructured":"Volodymyr Mnih. 2013. Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)."},{"key":"e_1_3_2_1_40_1","first-page":"2651","volume-title":"Proc. of NeurIPS","author":"Mou Zhiyu","year":"2022","unstructured":"Zhiyu Mou, Yusen Huo, Rongquan Bai, Mingzhou Xie, Chuan Yu, Jian Xu, and Bo Zheng. 2022. Sustainable online reinforcement learning for auto-bidding. Proc. of NeurIPS (2022), 2651-2663."},{"key":"e_1_3_2_1_41_1","volume-title":"Proc. of NeurIPS","author":"Nachum Ofir","year":"2017","unstructured":"Ofir Nachum, Mohammad Norouzi, Kelvin Xu, and Dale Schuurmans. 2017. Bridging the gap between value and policy based reinforcement learning. Proc. of NeurIPS (2017)."},{"key":"e_1_3_2_1_42_1","volume-title":"Real-time bidding in online display advertising. Marketing Science","author":"Sayedi Amin","year":"2018","unstructured":"Amin Sayedi. 2018. Real-time bidding in online display advertising. Marketing Science (2018), 553-568."},{"key":"e_1_3_2_1_43_1","first-page":"387","article-title":"Deterministic policy gradient algorithms","author":"Silver David","year":"2014","unstructured":"David Silver, Guy Lever, Nicolas Heess, Thomas Degris, Daan Wierstra, and Martin Riedmiller. 2014. Deterministic policy gradient algorithms. In Proc. of ICML. 387-395.","journal-title":"Proc. of ICML."},{"key":"e_1_3_2_1_44_1","volume-title":"Proc. of NeurIPS.","author":"Su Kefan","year":"2024","unstructured":"Kefan Su, Yusen Huo, Zhilin Zhang, Shuai Dou, Chuan Yu, Jian Xu, Zongqing Lu, and Bo Zheng. 2024. AuctionNet: A Novel Benchmark for Decision-Making in Large-Scale Games. In Proc. of NeurIPS."},{"key":"e_1_3_2_1_45_1","volume-title":"Proc. of NeurIPS","author":"Sutton Richard S","year":"1999","unstructured":"Richard S Sutton, David McAllester, Satinder Singh, and Yishay Mansour. 1999. Policy gradient methods for reinforcement learning with function approximation. Proc. of NeurIPS (1999)."},{"key":"e_1_3_2_1_46_1","volume-title":"A unified analysis of value-function-based reinforcement-learning algorithms. Neural computation","author":"Szepesv\u00e1ri Csaba","year":"1999","unstructured":"Csaba Szepesv\u00e1ri and Michael L Littman. 1999. A unified analysis of value-function-based reinforcement-learning algorithms. Neural computation (1999), 2017-2060."},{"key":"e_1_3_2_1_47_1","volume-title":"Deep Reinforcement Learning: A Survey","author":"Wang X","year":"2022","unstructured":"X Wang, S Wang, X Liang, D Zhao, J Huang, X Xu, B Dai, and Q Miao. 2022. Deep Reinforcement Learning: A Survey. IEEE Transactions on Neural Networks and Learning Systems (2022)."},{"key":"e_1_3_2_1_48_1","volume-title":"Yi Wong, Ziru Liu, Xiangyu Zhao, Yichao Wang, Bo Chen, Huifeng Guo, and Ruiming Tang.","author":"Wang Yuhao","year":"2023","unstructured":"Yuhao Wang, Ha Tsz Lam, Yi Wong, Ziru Liu, Xiangyu Zhao, Yichao Wang, Bo Chen, Huifeng Guo, and Ruiming Tang. 2023a. Multi-task deep recommender systems: A survey. arXiv preprint arXiv:2302.03525 (2023)."},{"key":"e_1_3_2_1_49_1","first-page":"1498","article-title":"PLATE: A prompt-enhanced paradigm for multi-scenario recommendations","author":"Wang Yuhao","year":"2023","unstructured":"Yuhao Wang, Xiangyu Zhao, Bo Chen, Qidong Liu, Huifeng Guo, Huanshuo Liu, Yichao Wang, Rui Zhang, and Ruiming Tang. 2023b. PLATE: A prompt-enhanced paradigm for multi-scenario recommendations. In Proc. of SIGIR. 1498-1507.","journal-title":"Proc. of SIGIR."},{"key":"e_1_3_2_1_50_1","first-page":"1129","article-title":"A cooperative-competitive multi-agent framework for auto-bidding in online advertising","author":"Wen Chao","year":"2022","unstructured":"Chao Wen, Miao Xu, Zhilin Zhang, Zhenzhe Zheng, Yuhui Wang, Xiangyu Liu, Yu Rong, Dong Xie, Xiaoyang Tan, Chuan Yu, et al., 2022. A cooperative-competitive multi-agent framework for auto-bidding in online advertising. In Proc. of WSDM. 1129-1139.","journal-title":"Proc. of WSDM."},{"key":"e_1_3_2_1_51_1","first-page":"1443","article-title":"Budget constrained bidding by model-free reinforcement learning in display advertising","author":"Wu Di","year":"2018","unstructured":"Di Wu, Xiujun Chen, Xun Yang, Hao Wang, Qing Tan, Xiaoxun Zhang, Jian Xu, and Kun Gai. 2018. Budget constrained bidding by model-free reinforcement learning in display advertising. In Proc. of CIKM. 1443-1451.","journal-title":"Proc. of CIKM."},{"key":"e_1_3_2_1_52_1","volume-title":"Proc. of NeurIPS","author":"Wu Yueh-Hua","year":"2024","unstructured":"Yueh-Hua Wu, Xiaolong Wang, and Masashi Hamaya. 2024. Elastic decision transformer. Proc. of NeurIPS (2024)."},{"key":"e_1_3_2_1_53_1","volume-title":"Proc. of ICLR.","author":"Xu Haoran","unstructured":"Haoran Xu, Li Jiang, Jianxiong Li, Zhuoran Yang, Zhaoran Wang, Victor Wai Kin Chan, and Xianyuan Zhan. [n.d.]. Offline RL with No OOD Actions: In-Sample Learning via Implicit Value Regularization. In Proc. of ICLR."},{"key":"e_1_3_2_1_54_1","volume-title":"Deep reinforcement learning for strategic bidding in electricity markets","author":"Ye Yujian","year":"2019","unstructured":"Yujian Ye, Dawei Qiu, Mingyang Sun, Dimitrios Papadaskalopoulos, and Goran Strbac. 2019. Deep reinforcement learning for strategic bidding in electricity markets. IEEE Transactions on Smart Grid (2019), 1343-1355."},{"key":"e_1_3_2_1_55_1","volume-title":"Proc. of NeurIPS","author":"Yu Hao","year":"2017","unstructured":"Hao Yu, Michael Neely, and Xiaohan Wei. 2017. Online convex optimization with stochastic constraints. Proc. of NeurIPS (2017)."},{"key":"e_1_3_2_1_56_1","first-page":"3604","article-title":"An actor-critic reinforcement learning model for optimal bidding in online display advertising","author":"Yuan Congde","year":"2022","unstructured":"Congde Yuan, Mengzhuo Guo, Chaoneng Xiang, Shuangyang Wang, Guoqing Song, and Qingpeng Zhang. 2022. An actor-critic reinforcement learning model for optimal bidding in online display advertising. In Proc. of CIKM. 3604-3613.","journal-title":"Proc. of CIKM."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/2501040.2501980"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219918"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i1.16156"},{"key":"e_1_3_2_1_60_1","volume-title":"Long Xia, Jiliang Tang, and Dawei Yin with Martin Vesely as coordinator. ACM sigweb newsletter","author":"Zhao Xiangyu","year":"2019","unstructured":"Xiangyu Zhao, Long Xia, Jiliang Tang, and Dawei Yin. 2019. '' Deep reinforcement learning for search, recommendation, and online advertising: a survey'' by Xiangyu Zhao, Long Xia, Jiliang Tang, and Dawei Yin with Martin Vesely as coordinator. ACM sigweb newsletter (2019), 1-15."},{"key":"e_1_3_2_1_61_1","first-page":"1040","article-title":"Recommendations with negative feedback via pairwise deep reinforcement learning","author":"Zhao Xiangyu","year":"2018","unstructured":"Xiangyu Zhao, Liang Zhang, Zhuoye Ding, Long Xia, Jiliang Tang, and Dawei Yin. 2018b. Recommendations with negative feedback via pairwise deep reinforcement learning. In Proc. of KDD. 1040-1048.","journal-title":"Proc. of KDD."},{"key":"e_1_3_2_1_62_1","first-page":"3319","article-title":"Jointly learning to recommend and advertise","author":"Zhao Xiangyu","year":"2020","unstructured":"Xiangyu Zhao, Xudong Zheng, Xiwang Yang, Xiaobing Liu, and Jiliang Tang. 2020. Jointly learning to recommend and advertise. In Proc. of KDD. 3319-3327.","journal-title":"Proc. of KDD."},{"key":"e_1_3_2_1_63_1","first-page":"27042","article-title":"Online decision transformer","author":"Zheng Qinqing","year":"2022","unstructured":"Qinqing Zheng, Amy Zhang, and Aditya Grover. 2022. Online decision transformer. In Proc. of ICML. 27042-27059.","journal-title":"Proc. of ICML."}],"event":{"name":"SIGIR '25: The 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","location":"Padua Italy","acronym":"SIGIR '25","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3726302.3729987","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T18:32:39Z","timestamp":1755887559000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3726302.3729987"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,13]]},"references-count":63,"alternative-id":["10.1145\/3726302.3729987","10.1145\/3726302"],"URL":"https:\/\/doi.org\/10.1145\/3726302.3729987","relation":{},"subject":[],"published":{"date-parts":[[2025,7,13]]},"assertion":[{"value":"2025-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}