{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,11]],"date-time":"2026-04-11T13:06:35Z","timestamp":1775912795802,"version":"3.50.1"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,11]]},"DOI":"10.1109\/iros40897.2019.8967849","type":"proceedings-article","created":{"date-parts":[[2020,1,30]],"date-time":"2020-01-30T23:53:51Z","timestamp":1580428431000},"page":"8193-8200","source":"Crossref","is-referenced-by-count":24,"title":["Policy Distillation and Value Matching in Multiagent Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Samir","family":"Wadhwania","sequence":"first","affiliation":[{"name":"Massachusetts Institute of Technology,Laboratory for Information and Decision Systems,77 Massachusetts Ave.,MA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dong-Ki","family":"Kim","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology,Laboratory for Information and Decision Systems,77 Massachusetts Ave.,MA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shayegan","family":"Omidshafiei","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology,Laboratory for Information and Decision Systems,77 Massachusetts Ave.,MA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jonathan P.","family":"How","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology,Laboratory for Information and Decision Systems,77 Massachusetts Ave.,MA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11492","article-title":"Emergence of grounded compositional language in multi-agent populations","author":"mordatch","year":"2018","journal-title":"Thirty-Second AAAI Conference on Artificial Intelligence"},{"key":"ref32","first-page":"2244","article-title":"Learning multiagent communication with backpropagation","author":"sukhbaatar","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref31","article-title":"Learning hierarchical teaching in cooperative multiagent reinforcement learning","volume":"abs 1903 3216","author":"kim","year":"2019","journal-title":"CoRR"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016128"},{"key":"ref34","article-title":"Multi-agent cooperation and the emergence of (natural) language","author":"lazaridou","year":"2016","journal-title":"arXiv preprint arXiv 1612 07182"},{"key":"ref10","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11794","article-title":"Counterfactual multi-agent policy gradients","author":"foerster","year":"2018","journal-title":"Thirty-Second AAAI Conference on Artificial Intelligence"},{"key":"ref11","article-title":"Policy Distillation","author":"rusu","year":"2015","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-005-2631-2"},{"key":"ref13","article-title":"Symmetry in markov decision processes and its implications for single agent and multi agent learning","author":"zinkevich","year":"2001","journal-title":"Proc 18th Int Conf Machine Learning"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"ref15","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref16","first-page":"1856","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/1150402.1150464"},{"key":"ref18","first-page":"1352","article-title":"Reinforcement learning with deep energy-based policies","author":"haarnoja","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992699"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-247-2.50017-6"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/nature24270","article-title":"Mastering the game of Go without human knowledge","volume":"550","author":"silver","year":"2017","journal-title":"Nature"},{"key":"ref27","first-page":"792","article-title":"Principled methods for advising reinforcement learning agents","author":"wiewiora","year":"2003","journal-title":"Proceedings of the 20th International Conference on Machine Learning"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"ref29","first-page":"1100","article-title":"Simultaneously learning and advising in multiagent reinforcement learning","author":"da silva","year":"2017","journal-title":"Proceedings of the 16th Conference on Autonomous Agents and MultiAgent Systems International Foundation for Autonomous Agents and Multiagent Systems"},{"key":"ref5","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref8","first-page":"183","article-title":"Multi-agent reinforcement learning: An overview","author":"bu?oniu","year":"2010","journal-title":"Innovations in Multi-Agent Systems and Applications - 1"},{"key":"ref7","first-page":"2681","article-title":"Deep decentralized multi-task multi-agent reinforcement learning under partial observability","author":"omidshafiei","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1049\/iet-its.2009.0070"},{"key":"ref9","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","author":"lowe","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3005745.3005750"},{"key":"ref20","first-page":"28","article-title":"Transfer learning in multi-agent systems through parallel transfer","author":"taylor","year":"2013","journal-title":"Workshop on Theoretically Grounded Transfer Learning at the 30th International Conf on Machine Learning (Poster)"},{"key":"ref22","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","author":"ross","year":"2011","journal-title":"Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2008.10.024"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref23","first-page":"2","article-title":"Algorithms for inverse reinforcement learning","volume":"1","author":"ng","year":"2000","journal-title":"et al"},{"key":"ref26","first-page":"249","article-title":"Transfer learning in multi-agent reinforcement learning domains","author":"boutsioukis","year":"2011","journal-title":"Proc of European Workshop on Reinforcement Learning"},{"key":"ref25","first-page":"1995","article-title":"Coordinated multi-agent imitation learning","author":"le","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning"}],"event":{"name":"2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","location":"Macau, China","start":{"date-parts":[[2019,11,3]]},"end":{"date-parts":[[2019,11,8]]}},"container-title":["2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8957008\/8967518\/08967849.pdf?arnumber=8967849","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,30]],"date-time":"2025-07-30T18:44:34Z","timestamp":1753901074000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8967849\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/iros40897.2019.8967849","relation":{},"subject":[],"published":{"date-parts":[[2019,11]]}}}