{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,13]],"date-time":"2026-07-13T14:50:39Z","timestamp":1783954239006,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,7,19]],"date-time":"2018-07-19T00:00:00Z","timestamp":1531958400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"The Major Basic Research Project of Shaanxi Province","award":["2017ZDJC-31"],"award-info":[{"award-number":["2017ZDJC-31"]}]},{"name":"The National Natural Science Foundation of China","award":["61522206 61373118 61672409"],"award-info":[{"award-number":["61522206 61373118 61672409"]}]},{"name":"The Science and Technology Plan Program in Shaanxi Province of China","award":["2017KJXX-80"],"award-info":[{"award-number":["2017KJXX-80"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,7,19]]},"DOI":"10.1145\/3219819.3219918","type":"proceedings-article","created":{"date-parts":[[2018,7,19]],"date-time":"2018-07-19T13:05:12Z","timestamp":1532005512000},"page":"1021-1030","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":63,"title":["Deep Reinforcement Learning for Sponsored Search Real-time Bidding"],"prefix":"10.1145","author":[{"given":"Jun","family":"Zhao","sequence":"first","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Guang","family":"Qiu","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ziyu","family":"Guan","sequence":"additional","affiliation":[{"name":"Xidian University, Xian, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wei","family":"Zhao","sequence":"additional","affiliation":[{"name":"Xidian University, Xian, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaofei","family":"He","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2018,7,19]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Budget optimization for sponsored search: Censored learning in MDPs. arXiv preprint arXiv:1210.4847","author":"Amin Kareem","year":"2012"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/1242572.1242644"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1064009.1064014"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1935826.1935901"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2007.913919"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018661.3018702"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2020408.2020604"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1526709.1526741"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1250910.1250917"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/1367497.1367506"},{"key":"e_1_3_2_1_11_1","volume-title":"International Conference on Machine Learning. 2829--2838","author":"Gu Shixiang","year":"2016"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-011-5235-x"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1080\/1019678042000245119"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2501040.2501979"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2339530.2339651"},{"key":"e_1_3_2_1_16_1","unstructured":"Sergey Levine and Pieter Abbeel. 2014. Learning neural network policies with guided policy search under unknown dynamics. In Advances in Neural Information Processing Systems. 1071--1079.   Sergey Levine and Pieter Abbeel. 2014. Learning neural network policies with guided policy search under unknown dynamics. In Advances in Neural Information Processing Systems. 1071--1079."},{"key":"e_1_3_2_1_17_1","volume-title":"Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602","author":"Mnih Volodymyr","year":"2013"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei A Rusu Joel Veness Marc G Bellemare Alex Graves Martin Riedmiller Andreas K Fidjeland Georg Ostrovski etal 2015. Human-level control through deep reinforcement learning. Nature 518 7540 (2015) 529.  Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei A Rusu Joel Veness Marc G Bellemare Alex Graves Martin Riedmiller Andreas K Fidjeland Georg Ostrovski et al. 2015. Human-level control through deep reinforcement learning. Nature 518 7540 (2015) 529.","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.5555\/1781894.1781913"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2339530.2339655"},{"key":"e_1_3_2_1_21_1","volume-title":"Artificial Intelligence: foundations of computational agents","author":"Poole David L"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","volume-title":"Multi-agent machine learning: A reinforcement approach","author":"Schwartz Howard M","DOI":"10.1002\/9781118884614"},{"key":"e_1_3_2_1_23_1","volume-title":"Julian Schrittwieser, Ioannis Antonoglou, Veda Panneershelvam, Marc Lanctot, et al.","author":"Silver David","year":"2016"},{"key":"e_1_3_2_1_24_1","volume-title":"Reinforcement learning: An introduction","author":"Sutton Richard S"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0172395"},{"key":"e_1_3_2_1_26_1","volume-title":"Divide the gradient by a running average of its recent magnitude. COURSERA: Neural networks for machine learning 4, 2","author":"Tieleman Tijmen","year":"2012"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/2684822.2697041"},{"key":"e_1_3_2_1_28_1","volume-title":"LADDER: A Human-Level Bidding Agent for Large-Scale Real-Time Online Auctions. arXiv preprint arXiv:1708.05565","author":"Wang Yu","year":"2017"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2783258.2783276"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2501040.2501980"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/2623330.2623633"}],"event":{"name":"KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","location":"London United Kingdom","acronym":"KDD '18","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3219819.3219918","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3219819.3219918","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T02:07:21Z","timestamp":1750212441000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3219819.3219918"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,7,19]]},"references-count":31,"alternative-id":["10.1145\/3219819.3219918","10.1145\/3219819"],"URL":"https:\/\/doi.org\/10.1145\/3219819.3219918","relation":{},"subject":[],"published":{"date-parts":[[2018,7,19]]},"assertion":[{"value":"2018-07-19","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}