{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:02:16Z","timestamp":1750309336603,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,24]],"date-time":"2024-08-24T00:00:00Z","timestamp":1724457600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,25]]},"DOI":"10.1145\/3637528.3671555","type":"proceedings-article","created":{"date-parts":[[2024,8,25]],"date-time":"2024-08-25T04:55:12Z","timestamp":1724561712000},"page":"5251-5259","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Offline Reinforcement Learning for Optimizing Production Bidding Policies"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-8974-6200","authenticated-orcid":false,"given":"Dmytro","family":"Korenkevych","sequence":"first","affiliation":[{"name":"AI at Meta, Menlo Park, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0550-3377","authenticated-orcid":false,"given":"Frank","family":"Cheng","sequence":"additional","affiliation":[{"name":"AI at Meta, Sunnyvale, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0653-5346","authenticated-orcid":false,"given":"Artsiom","family":"Balakir","sequence":"additional","affiliation":[{"name":"AI at Meta, Menlo Park, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2335-289X","authenticated-orcid":false,"given":"Alex","family":"Nikulkov","sequence":"additional","affiliation":[{"name":"AI at Meta, Bellevue, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3782-2178","authenticated-orcid":false,"given":"Lingnan","family":"Gao","sequence":"additional","affiliation":[{"name":"Meta Platform Inc., Menlo Park, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-9422-9714","authenticated-orcid":false,"given":"Zhihao","family":"Cen","sequence":"additional","affiliation":[{"name":"AI at Meta, Menlo Park, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3875-7844","authenticated-orcid":false,"given":"Zuobing","family":"Xu","sequence":"additional","affiliation":[{"name":"Meta Platform Inc., Menlo Park, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1162-106X","authenticated-orcid":false,"given":"Zheqing","family":"Zhu","sequence":"additional","affiliation":[{"name":"AI at Meta, Bellevue, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,8,24]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Budget optimization for sponsored search: Censored learning in mdps. arXiv preprint arXiv:1210.4847","author":"Amin Kareem","year":"2012","unstructured":"Kareem Amin, Michael Kearns, Peter Key, and Anton Schwaighofer. 2012. Budget optimization for sponsored search: Censored learning in mdps. arXiv preprint arXiv:1210.4847 (2012)."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"e_1_3_2_2_3_1","first-page":"2142","article-title":"Incrementality Bidding via Reinforcement Learning under Mixed and Delayed Rewards","volume":"35","author":"Varadaraja Ashwinkumar Badanidiyuru","year":"2022","unstructured":"Ashwinkumar Badanidiyuru Varadaraja, Zhe Feng, Tianxi Li, and Haifeng Xu. 2022. Incrementality Bidding via Reinforcement Learning under Mixed and Delayed Rewards. Advances in Neural Information Processing Systems, Vol. 35 (2022), 2142--2153.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_4_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Balseiro Santiago","year":"2019","unstructured":"Santiago Balseiro, Negin Golrezaei, Mohammad Mahdian, Vahab Mirrokni, and Jon Schneider. 2019. Contextual bandits with cross-learning. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/37.248006"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018661.3018702"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2017.03.002"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compchemeng.2022.107760"},{"key":"e_1_3_2_2_9_1","volume-title":"Internet advertising and the generalized second-price auction: Selling billions of dollars worth of keywords. American economic review","author":"Edelman Benjamin","year":"2007","unstructured":"Benjamin Edelman, Michael Ostrovsky, and Michael Schwarz. 2007. Internet advertising and the generalized second-price auction: Selling billions of dollars worth of keywords. American economic review, Vol. 97, 1 (2007), 242--259."},{"key":"e_1_3_2_2_10_1","unstructured":"Benjamin Eysenbach Matthieu Geist Sergey Levine and Ruslan Salakhutdinov. 2023. A Connection between One-Step RL and Critic Regularization in Reinforcement Learning. (2023)."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583491"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219166.3219208"},{"key":"e_1_3_2_2_13_1","volume-title":"International conference on machine learning. PMLR","author":"Fujimoto Scott","year":"2019","unstructured":"Scott Fujimoto, David Meger, and Doina Precup. 2019. Off-policy deep reinforcement learning without exploration. In International conference on machine learning. PMLR, 2052--2062."},{"key":"e_1_3_2_2_14_1","unstructured":"Tuomas Haarnoja Aurick Zhou Kristian Hartikainen George Tucker Sehoon Ha Jie Tan Vikash Kumar Henry Zhu Abhishek Gupta Pieter Abbeel et al. 2018. Soft actor-critic algorithms and applications. arXiv preprint arXiv:1812.05905 (2018)."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467199"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1386790.1386794"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.5555\/645531.656005"},{"key":"e_1_3_2_2_18_1","volume-title":"Conference on Robot Learning. PMLR, 651--673","author":"Kalashnikov Dmitry","year":"2018","unstructured":"Dmitry Kalashnikov, Alex Irpan, Peter Pastor, Julian Ibarz, Alexander Herzog, Eric Jang, Deirdre Quillen, Ethan Holly, Mrinal Kalakrishnan, Vincent Vanhoucke, et al. 2018. Scalable deep reinforcement learning for vision-based robotic manipulation. In Conference on Robot Learning. PMLR, 651--673."},{"key":"e_1_3_2_2_19_1","volume-title":"Autoregressive policies for continuous control deep reinforcement learning. arXiv preprint arXiv:1903.11524","author":"Korenkevych Dmytro","year":"2019","unstructured":"Dmytro Korenkevych, A Rupam Mahmood, Gautham Vasan, and James Bergstra. 2019. Autoregressive policies for continuous control deep reinforcement learning. arXiv preprint arXiv:1903.11524 (2019)."},{"volume-title":"Auction theory","author":"Krishna Vijay","key":"e_1_3_2_2_20_1","unstructured":"Vijay Krishna. 2009. Auction theory. Academic press."},{"key":"e_1_3_2_2_21_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Kumar Aviral","year":"2019","unstructured":"Aviral Kumar, Justin Fu, Matthew Soh, George Tucker, and Sergey Levine. 2019. Stabilizing off-policy q-learning via bootstrapping error reduction. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_2_22_1","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar Aviral","year":"2020","unstructured":"Aviral Kumar, Aurick Zhou, George Tucker, and Sergey Levine. 2020. Conservative q-learning for offline reinforcement learning. Advances in Neural Information Processing Systems, Vol. 33 (2020), 1179--1191.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.5555\/2946645.2946684"},{"key":"e_1_3_2_2_24_1","volume-title":"Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643","author":"Levine Sergey","year":"2020","unstructured":"Sergey Levine, Aviral Kumar, George Tucker, and Justin Fu. 2020. Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643 (2020)."},{"key":"e_1_3_2_2_25_1","volume-title":"Autobidders with budget and roi constraints: Efficiency, regret, and pacing dynamics. arXiv preprint arXiv:2301.13306","author":"Lucier Brendan","year":"2023","unstructured":"Brendan Lucier, Sarath Pattathil, Aleksandrs Slivkins, and Mengxiao Zhang. 2023. Autobidders with budget and roi constraints: Efficiency, regret, and pacing dynamics. arXiv preprint arXiv:2301.13306 (2023)."},{"key":"e_1_3_2_2_26_1","volume-title":"Conference on robot learning. PMLR, 561--591","author":"Mahmood A Rupam","year":"2018","unstructured":"A Rupam Mahmood, Dmytro Korenkevych, Gautham Vasan, William Ma, and James Bergstra. 2018. Benchmarking reinforcement learning algorithms on real-world robots. In Conference on robot learning. PMLR, 561--591."},{"key":"e_1_3_2_2_27_1","volume-title":"International conference on machine learning. PMLR","author":"Mnih Volodymyr","year":"2016","unstructured":"Volodymyr Mnih, Adria Puigdomenech Badia, Mehdi Mirza, Alex Graves, Timothy Lillicrap, Tim Harley, David Silver, and Koray Kavukcuoglu. 2016. Asynchronous methods for deep reinforcement learning. In International conference on machine learning. PMLR, 1928--1937."},{"key":"e_1_3_2_2_28_1","first-page":"2651","article-title":"Sustainable Online Reinforcement Learning for Auto-bidding","volume":"35","author":"Mou Zhiyu","year":"2022","unstructured":"Zhiyu Mou, Yusen Huo, Rongquan Bai, Mingzhou Xie, Chuan Yu, Jian Xu, and Bo Zheng. 2022. Sustainable Online Reinforcement Learning for Auto-bidding. Advances in Neural Information Processing Systems, Vol. 35 (2022), 2651--2663.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2339530.2339655"},{"key":"e_1_3_2_2_30_1","volume-title":"Proceedings of world academy of science, engineering and technology","volume":"27","author":"Sedighizadeh Mostafa","year":"2008","unstructured":"Mostafa Sedighizadeh and Alireza Rezazadeh. 2008. Adaptive PID controller based on reinforcement learning for wind turbine control. In Proceedings of world academy of science, engineering and technology, Vol. 27. Citeseer, 257--262."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.3390\/min11090989"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11276-019-02225-x"},{"key":"e_1_3_2_2_33_1","unstructured":"Richard S Sutton Andrew G Barto et al. 1998. Introduction to reinforcement learning. Vol. 135. MIT press Cambridge."},{"key":"e_1_3_2_2_34_1","volume-title":"Adversarial Constrained Bidding via Minimax Regret Optimization with Causality-Aware Reinforcement Learning. arXiv preprint arXiv:2306.07106","author":"Wang Haozhe","year":"2023","unstructured":"Haozhe Wang, Chao Du, Panyan Fang, Li He, Liang Wang, and Bo Zheng. 2023. Adversarial Constrained Bidding via Minimax Regret Optimization with Causality-Aware Reinforcement Learning. arXiv preprint arXiv:2306.07106 (2023)."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/2684822.2697041"},{"key":"e_1_3_2_2_36_1","volume-title":"Conference on Learning Theory. PMLR, 1562--1583","author":"Weed Jonathan","year":"2016","unstructured":"Jonathan Weed, Vianney Perchet, and Philippe Rigollet. 2016. Online learning in repeated auctions. In Conference on Learning Theory. PMLR, 1562--1583."},{"key":"e_1_3_2_2_37_1","first-page":"1","article-title":"Tianshou: A Highly Modularized Deep Reinforcement Learning Library","volume":"23","author":"Weng Jiayi","year":"2022","unstructured":"Jiayi Weng, Huayu Chen, Dong Yan, Kaichao You, Alexis Duburcq, Minghao Zhang, Yi Su, Hang Su, and Jun Zhu. 2022. Tianshou: A Highly Modularized Deep Reinforcement Learning Library. Journal of Machine Learning Research, Vol. 23, 267 (2022), 1--6. http:\/\/jmlr.org\/papers\/v23\/21--1127.html","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3271748"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330681"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2501040.2501980"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/2835776.2835843"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/2783258.2783269"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/2623330.2623633"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219918"},{"key":"e_1_3_2_2_45_1","volume-title":"Multi-Objective Actor-Critics for Real-Time Bidding in Display Advertising. In Joint European Conference on Machine Learning and Knowledge Discovery in Databases. Springer, 20--37","author":"Zhou Haolin","year":"2022","unstructured":"Haolin Zhou, Chaoqi Yang, Xiaofeng Gao, Qiong Chen, Gongshen Liu, and Guihai Chen. 2022. Multi-Objective Actor-Critics for Real-Time Bidding in Display Advertising. In Joint European Conference on Machine Learning and Knowledge Discovery in Databases. Springer, 20--37."}],"event":{"name":"KDD '24: The 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Barcelona Spain","acronym":"KDD '24"},"container-title":["Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671555","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3637528.3671555","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:04:19Z","timestamp":1750291459000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671555"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,24]]},"references-count":45,"alternative-id":["10.1145\/3637528.3671555","10.1145\/3637528"],"URL":"https:\/\/doi.org\/10.1145\/3637528.3671555","relation":{},"subject":[],"published":{"date-parts":[[2024,8,24]]},"assertion":[{"value":"2024-08-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}