{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:18:53Z","timestamp":1775229533314,"version":"3.50.1"},"reference-count":39,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2016YFB1000904"],"award-info":[{"award-number":["2016YFB1000904"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2022,10]]},"DOI":"10.1109\/tnnls.2021.3070484","type":"journal-article","created":{"date-parts":[[2021,5,17]],"date-time":"2021-05-17T20:51:50Z","timestamp":1621284710000},"page":"5361-5373","source":"Crossref","is-referenced-by-count":6,"title":["Hierarchical Multiagent Reinforcement Learning for Allocating Guaranteed Display Ads"],"prefix":"10.1109","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7305-1496","authenticated-orcid":false,"given":"Lu","family":"Wang","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, East China Normal University, Shanghai, China"}]},{"given":"Lei","family":"Han","sequence":"additional","affiliation":[{"name":"Tencent AI Lab, Tencent, Inc., Shenzhen, China"}]},{"given":"Xinru","family":"Chen","sequence":"additional","affiliation":[{"name":"Tencent AI Lab, Tencent, Inc., Shenzhen, China"}]},{"given":"Chengchang","family":"Li","sequence":"additional","affiliation":[{"name":"Tencent AI Lab, Tencent, Inc., Shenzhen, China"}]},{"given":"Junzhou","family":"Huang","sequence":"additional","affiliation":[{"name":"Tencent AI Lab, Tencent, Inc., Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0127-2425","authenticated-orcid":false,"given":"Weinan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6763-8146","authenticated-orcid":false,"given":"Wei","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6911-348X","authenticated-orcid":false,"given":"Xiaofeng","family":"He","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, East China Normal University, Shanghai, China"}]},{"given":"Dijun","family":"Luo","sequence":"additional","affiliation":[{"name":"Tencent AI Lab, Tencent, Inc., Shenzhen, China"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"eMarketer","year":"2019"},{"key":"ref2","volume-title":"The Arrivals of Real-Time Bidding","year":"2011"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/2684822.2697041"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/2623330.2623633"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/2229012.2229038"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/2339530.2339718"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1287\/mksc.2018.1104"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2016.1567"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/FOCS.2009.72"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611973099.134"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/SFCS.2005.12"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1287\/opre.2014.1289"},{"key":"ref13","volume":"461","author":"\u00c5str\u00f6m","year":"2006","journal-title":"Advanced PID Control"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1017\/S0269888912000057"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-71682-4_5"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11492"},{"key":"ref17","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lowe"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v28i1.9030"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/MIC.2015.137"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.cie.2019.106071"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/1807342.1807360"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2015.2403394"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2609500"},{"key":"ref25","article-title":"Stabilising experience replay for deep multi-agent reinforcement learning","volume-title":"arXiv:1702.08887","author":"Foerster","year":"2017"},{"key":"ref26","first-page":"1043","article-title":"Reinforcement learning with hierarchies of machines","volume-title":"Proc. NIPS","author":"Parr"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1613\/jair.639"},{"key":"ref29","article-title":"Temporal abstraction in reinforcement learning","author":"Precup","year":"2000"},{"key":"ref30","first-page":"3540","article-title":"Feudal networks for hierarchical reinforcement learning","volume-title":"Proc. ICML","author":"Vezhnevets"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3103642"},{"key":"ref32","first-page":"1","article-title":"Multi-agent shared hierarchy reinforcement learning","volume-title":"Proc. ICML","author":"Mehta"},{"key":"ref33","first-page":"1114","article-title":"Learning to communicate and act using hierarchical reinforcement learning","volume-title":"Proc. 3rd Int. Joint Conf. Auton. Agents Multiagent Syst.","volume":"3","author":"Ghavamzadeh"},{"key":"ref34","first-page":"1","article-title":"Hierarchical deep multiagent reinforcement learning","volume-title":"Proc. AAAI","author":"Tang"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/375735.376302"},{"key":"ref36","article-title":"Feudal multi-agent hierarchies for cooperative reinforcement learning","volume-title":"arXiv:1901.08492","author":"Ahilan","year":"2019"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.32917\/hmj\/1206139508"},{"key":"ref38","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"Proc. ICML","author":"Silver"},{"key":"ref39","first-page":"834","article-title":"Improving stochastic policy gradients in continuous control with deep reinforcement learning using the beta distribution","volume-title":"Proc. ICML","author":"Chou"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/9911935\/09432789.pdf?arnumber=9432789","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,10]],"date-time":"2024-01-10T00:20:34Z","timestamp":1704846034000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9432789\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10]]},"references-count":39,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2021.3070484","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10]]}}}