{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,18]],"date-time":"2026-06-18T16:12:02Z","timestamp":1781799122556,"version":"3.54.5"},"reference-count":52,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100004750","name":"Aeronautical Science Foundation of China","doi-asserted-by":"publisher","award":["20200058069001"],"award-info":[{"award-number":["20200058069001"]}],"id":[{"id":"10.13039\/501100004750","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Games"],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1109\/tg.2024.3375515","type":"journal-article","created":{"date-parts":[[2024,3,14]],"date-time":"2024-03-14T18:20:53Z","timestamp":1710440453000},"page":"62-75","source":"Crossref","is-referenced-by-count":7,"title":["GAILPG: Multiagent Policy Gradient With Generative Adversarial Imitation Learning"],"prefix":"10.1109","volume":"17","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9235-9429","authenticated-orcid":false,"given":"Wei","family":"Li","sequence":"first","affiliation":[{"name":"School of Instrument Science and Engineering, Southeast University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1114-5198","authenticated-orcid":false,"given":"Shiyi","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Instrument Science and Engineering, Southeast University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7287-5936","authenticated-orcid":false,"given":"Ziming","family":"Qiu","sequence":"additional","affiliation":[{"name":"School of Instrument Science and Engineering, Southeast University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1982-6780","authenticated-orcid":false,"given":"Aiguo","family":"Song","sequence":"additional","affiliation":[{"name":"School of Instrument Science and Engineering, Southeast University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1002\/9781118884614"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/OJCOMS.2021.3081996"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-8155-7_464"},{"key":"ref4","article-title":"Safe, multi-agent, reinforcement learning for autonomous driving","author":"Shalev-Shwartz","year":"2016"},{"key":"ref5","article-title":"SMARTS: Scalable multi-agent reinforcement learning training school for autonomous driving","author":"Zhou","year":"2020"},{"key":"ref6","article-title":"An overview of multi-agent reinforcement learning from game theoretical perspective","author":"Yang","year":"2020"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref8","first-page":"15032","article-title":"PettingZoo: Gym for multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Terry","year":"2021"},{"key":"ref9","article-title":"Fever basketball: A complex, flexible, and asynchronized sports game environment for multi-agent reinforcement learning","author":"Jia","year":"2020"},{"key":"ref10","first-page":"2085","article-title":"Value-decomposition networks for cooperative multi-agent learning","volume-title":"Proc. Int. Joint Conf. Auton. Agents Multiagent Syst.","author":"Sunehag","year":"2018"},{"issue":"1","key":"ref11","first-page":"7234","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref12","first-page":"5887","article-title":"QTRAN: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Son","year":"2019"},{"key":"ref13","first-page":"1","article-title":"QPLEX: Duplex dueling multi-agent Q-learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Wang","year":"2020"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"ref15","first-page":"6382","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Lowe","year":"2017"},{"key":"ref16","article-title":"Off-policy multi-agent decomposed policy gradients","author":"Wang","year":"2020"},{"key":"ref17","article-title":"Benchmarking multi-agent deep reinforcement learning algorithms in cooperative tasks","author":"Papoudakis","year":"2020"},{"key":"ref18","first-page":"24611","article-title":"The surprising effectiveness of PPO in cooperative multi-agent games","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Yu","year":"2022"},{"key":"ref19","article-title":"Rethinking the implementation tricks and monotonicity constraint in cooperative multi-agent reinforcement learning","author":"Hu","year":"2021"},{"key":"ref20","first-page":"5048","article-title":"Hindsight experience replay","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Andrychowicz","year":"2017"},{"key":"ref21","first-page":"1","article-title":"Prioritized experience replay","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Schaul","year":"2016"},{"key":"ref22","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Lillicrap","year":"2016"},{"key":"ref23","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO49542.2019.8961529"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3356464.3357706"},{"key":"ref27","article-title":"Incentivizing exploration in reinforcement learning with deep predictive models","author":"Stadie","year":"2015"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-020-01849-3"},{"key":"ref29","first-page":"1","article-title":"RIDE: Rewarding impact-driven exploration for procedurally-generated environments","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Raileanu","year":"2020"},{"key":"ref30","article-title":"Exploration by random network distillation","author":"Burda","year":"2018"},{"key":"ref31","first-page":"1","article-title":"Never give up: Learning directed exploration strategies","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Badia","year":"2020"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TG.2022.3226910"},{"key":"ref33","first-page":"3757","article-title":"Episodic multi-agent reinforcement learning with curiosity-driven exploration","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Zheng","year":"2021"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"ref35","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ng","year":"2000"},{"key":"ref36","first-page":"4565","article-title":"Generative adversarial imitation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Ho","year":"2016"},{"key":"ref37","article-title":"Discriminator-actor-critic: Addressing sample inefficiency and reward bias in adversarial imitation learning","author":"Kostrikov","year":"2018"},{"key":"ref38","article-title":"Generative adversarial self-imitation learning","author":"Guo","year":"2018"},{"key":"ref39","first-page":"7472","article-title":"Multi-agent generative adversarial imitation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Song","year":"2018"},{"key":"ref40","first-page":"1315","article-title":"Independent generative adversarial self-imitation learning in cooperative multiagent systems","volume-title":"Proc. Int. Joint Conf. Auton. Agents Multiagent Syst.","author":"Hao","year":"2019"},{"key":"ref41","article-title":"Self-imitation learning from demonstrations","author":"Pshikhachev","year":"2022"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref44","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"12","author":"Sutton","year":"1999"},{"key":"ref45","first-page":"759","article-title":"Eligibility traces for off-policy policy evaluation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Precup","year":"2000"},{"key":"ref46","first-page":"1054","article-title":"Safe and efficient off-policy reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Munos","year":"2016"},{"key":"ref47","first-page":"1","article-title":"Learning robust rewards with adversarial inverse reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Fu","year":"2018"},{"key":"ref48","first-page":"2186","article-title":"The StarCraft multi-agent challenge","volume-title":"Proc. Int. Joint Conf. Auton. Agents Multiagent Syst.","author":"Samvelyan","year":"2019"},{"key":"ref49","first-page":"29427","article-title":"Multiagent Q-learning with sub-team coordination","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Huang","year":"2022"},{"key":"ref50","article-title":"Parametrized deep Q-networks learning: Reinforcement learning with discrete-continuous hybrid action space","author":"Xiong","year":"2018"},{"key":"ref51","first-page":"1","article-title":"Gray-box Gaussian processes for automated reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Shala","year":"2023"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3198981"}],"container-title":["IEEE Transactions on Games"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7782673\/10931171\/10465634.pdf?arnumber=10465634","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,19]],"date-time":"2025-03-19T20:04:28Z","timestamp":1742414668000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10465634\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3]]},"references-count":52,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tg.2024.3375515","relation":{},"ISSN":["2475-1502","2475-1510"],"issn-type":[{"value":"2475-1502","type":"print"},{"value":"2475-1510","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3]]}}}