{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T22:31:56Z","timestamp":1777501916710,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,3,22]],"date-time":"2021-03-22T00:00:00Z","timestamp":1616371200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"U.S. Air Force Research Laboratory (AFRL)","award":["FA8750-18-S-7007"],"award-info":[{"award-number":["FA8750-18-S-7007"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,3,22]]},"DOI":"10.1145\/3412841.3441953","type":"proceedings-article","created":{"date-parts":[[2021,4,23]],"date-time":"2021-04-23T05:10:24Z","timestamp":1619154624000},"page":"777-784","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":18,"title":["Multi-agent reinforcement learning with directed exploration and selective memory reuse"],"prefix":"10.1145","author":[{"given":"Shuo","family":"Jiang","sequence":"first","affiliation":[{"name":"Northeastern University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christopher","family":"Amato","sequence":"additional","affiliation":[{"name":"Northeastern University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Marc Bellemare Sriram Srinivasan Georg Ostrovski Tom Schaul David Saxton and Remi Munos. 2016. Unifying Count-based Exploration and Intrinsic Motivation. In Advances in Neural Information Processing Systems. 1471--1479."},{"key":"e_1_3_2_1_2_1","volume-title":"Using State Predictions for Value Regularization in Curiosity Driven Deep Reinforcement Learning. In 2018 IEEE 30th International Conference on Tools with Artificial Intelligence (ICTAI). IEEE, 25--29","author":"Brunner Gino","year":"2018","unstructured":"Gino Brunner, Manuel Fritsche, Oliver Richter, and Roger Wattenhofer. 2018. Using State Predictions for Value Regularization in Curiosity Driven Deep Reinforcement Learning. In 2018 IEEE 30th International Conference on Tools with Artificial Intelligence (ICTAI). IEEE, 25--29."},{"key":"e_1_3_2_1_3_1","volume-title":"Linda Kaufman, Alex J Smola, and Vladimir Vapnik.","author":"Drucker Harris","year":"1997","unstructured":"Harris Drucker, Christopher JC Burges, Linda Kaufman, Alex J Smola, and Vladimir Vapnik. 1997. Support Vector Regression Machines. In Advances in Neural Information Processing Systems. 155--161."},{"key":"e_1_3_2_1_4_1","volume-title":"Counterfactual Multi-Agent Policy Gradients. In Thirty-Second AAAI Conference on Artificial Intelligence.","author":"Foerster Jakob N","year":"2018","unstructured":"Jakob N Foerster, Gregory Farquhar, Triantafyllos Afouras, Nantas Nardelli, and Shimon Whiteson. 2018. Counterfactual Multi-Agent Policy Gradients. In Thirty-Second AAAI Conference on Artificial Intelligence."},{"key":"e_1_3_2_1_5_1","unstructured":"Justin Fu John Co-Reyes and Sergey Levine. 2017. Ex2: Exploration with Exemplar Models for Deep Reinforcement Learning. In Advances in Neural Information Processing Systems. 2577--2587."},{"key":"e_1_3_2_1_6_1","volume-title":"Generative Adversarial Self-imitation Learning. arXiv preprint arXiv:1812.00950","author":"Guo Yijie","year":"2018","unstructured":"Yijie Guo, Junhyuk Oh, Satinder Singh, and Honglak Lee. 2018. Generative Adversarial Self-imitation Learning. arXiv preprint arXiv:1812.00950 (2018)."},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 18th International Conference on Autonomous Agents and MultiAgent Systems. International Foundation for Autonomous Agents and Multiagent Systems, 1315--1323","author":"Hao Xiaotian","year":"2019","unstructured":"Xiaotian Hao, Weixun Wang, Jianye Hao, and Yaodong Yang. 2019. Independent Generative Adversarial Self-Imitation Learning in Cooperative Multiagent Systems. In Proceedings of the 18th International Conference on Autonomous Agents and MultiAgent Systems. International Foundation for Autonomous Agents and Multiagent Systems, 1315--1323."},{"key":"e_1_3_2_1_8_1","unstructured":"Jonathan Ho and Stefano Ermon. 2016. Generative Adversarial Imitation Learning. In Advances in Neural Information Processing Systems. 4565--4573."},{"key":"e_1_3_2_1_9_1","volume-title":"Distributed Prioritized Experience Replay. 6th International Conference on Learning Representations (ICLR 2018)","author":"Horgan Dan","year":"2018","unstructured":"Dan Horgan, John Quan, David Budden, Gabriel Barth-Maron, Matteo Hessel, Hado Van Hasselt, and David Silver. 2018. Distributed Prioritized Experience Replay. 6th International Conference on Learning Representations (ICLR 2018) (2018)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/SMC.2017.8122622"},{"key":"e_1_3_2_1_11_1","volume-title":"Filip De Turck, and Pieter Abbeel","author":"Houthooft Rein","year":"2016","unstructured":"Rein Houthooft, Xi Chen, Yan Duan, John Schulman, Filip De Turck, and Pieter Abbeel. 2016. Vime: Variational Information Maximizing Exploration. In Advances in Neural Information Processing Systems. 1109--1117."},{"key":"e_1_3_2_1_12_1","volume-title":"Coordinated Exploration via Intrinsic Rewards for Multi-Agent Reinforcement Learning. arXiv preprint arXiv:1905.12127","author":"Iqbal Shariq","year":"2019","unstructured":"Shariq Iqbal and Fei Sha. 2019. Coordinated Exploration via Intrinsic Rewards for Multi-Agent Reinforcement Learning. arXiv preprint arXiv:1905.12127 (2019)."},{"key":"e_1_3_2_1_13_1","volume-title":"EMI: Exploration with Mutual Information. In International Conference on Machine Learning. 3360--3369","author":"Kim Hyoungseok","year":"2019","unstructured":"Hyoungseok Kim, Jaekyeom Kim, Yeonwoo Jeong, Sergey Levine, and Hyun Oh Song. 2019. EMI: Exploration with Mutual Information. In International Conference on Machine Learning. 3360--3369."},{"key":"e_1_3_2_1_14_1","volume-title":"Continuous Control with Deep Reinforcement Learning. 4th International Conference on Learning Representations, ICLR 2016, San Juan, Puerto Rico, May 2--4, 2016, Conference Track Proceedings","author":"Lillicrap Timothy P","year":"2016","unstructured":"Timothy P Lillicrap, Jonathan J Hunt, Alexander Pritzel, Nicolas Heess, Tom Erez, Yuval Tassa, David Silver, and Daan Wierstra. 2016. Continuous Control with Deep Reinforcement Learning. 4th International Conference on Learning Representations, ICLR 2016, San Juan, Puerto Rico, May 2--4, 2016, Conference Track Proceedings (2016)."},{"key":"e_1_3_2_1_15_1","volume-title":"Kernel Adaptive Filtering: A Comprehensive Introduction","author":"Liu Weifeng","unstructured":"Weifeng Liu, Jose C Principe, and Simon Haykin. 2011. Kernel Adaptive Filtering: A Comprehensive Introduction. Vol. 57. John Wiley & Sons."},{"key":"e_1_3_2_1_16_1","volume-title":"OpenAI Pieter Abbeel, and Igor Mordatch","author":"Lowe Ryan","year":"2017","unstructured":"Ryan Lowe, Yi Wu, Aviv Tamar, Jean Harb, OpenAI Pieter Abbeel, and Igor Mordatch. 2017. Multi-Agent Actor-Critic for Mixed Cooperative-Competitive Environments. In Advances in Neural Information Processing Systems. 6379--6390."},{"key":"e_1_3_2_1_17_1","volume-title":"Playing Atari with Deep Reinforcement Learning. arXiv preprint arXiv:1312.5602","author":"Mnih Volodymyr","year":"2013","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis Antonoglou, Daan Wierstra, and Martin Riedmiller. 2013. Playing Atari with Deep Reinforcement Learning. arXiv preprint arXiv:1312.5602 (2013)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei A Rusu Joel Veness Marc G Bellemare Alex Graves Martin Riedmiller Andreas K Fidjeland Georg Ostrovski et al. 2015. Human-Level Control through Deep Reinforcement Learning. Nature 518 7540 (2015) 529.","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics. 557--564","author":"Nguyen-Tuong Duy","year":"2010","unstructured":"Duy Nguyen-Tuong and Jan Peters. 2010. Incremental Sparsification for Real-Time Online Model Learning. In Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics. 557--564."},{"key":"e_1_3_2_1_20_1","volume-title":"Self-Imitation Learning. Proceedings of the 35 th International Conference on Machine Learning","author":"Oh Junhyuk","year":"2018","unstructured":"Junhyuk Oh, Yijie Guo, Satinder Singh, and Honglak Lee. 2018. Self-Imitation Learning. Proceedings of the 35 th International Conference on Machine Learning, Stockholm, Sweden, PMLR 80, 2018. (2018)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Frans A Oliehoek Christopher Amato et al. 2016. A Concise Introduction to Decentralized POMDPs. Vol. 1. Springer.","DOI":"10.1007\/978-3-319-28929-8_1"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.5555\/3305890.3305958"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"e_1_3_2_1_24_1","volume-title":"Proceedings of the 35-th International Conference on Machine Learning","author":"Rashid Tabish","year":"2018","unstructured":"Tabish Rashid, Mikayel Samvelyan, Christian Schroeder De Witt, Gregory Farquhar, Jakob Foerster, and Shimon Whiteson. 2018. QMIX: Monotonic Value Function Factorisation for Deep Multi-Agent Reinforcement Learning. Proceedings of the 35-th International Conference on Machine Learning, Stockholm, Sweden, PMLR 80, 2018 (2018)."},{"key":"e_1_3_2_1_25_1","volume-title":"Summer School on Machine Learning","author":"Rasmussen Carl Edward","unstructured":"Carl Edward Rasmussen. 2003. Gaussian Processes in Machine Learning. In Summer School on Machine Learning. Springer, 63--71."},{"key":"e_1_3_2_1_26_1","volume-title":"Prioritized Experience Replay. arXiv preprint arXiv:1511.05952","author":"Schaul Tom","year":"2015","unstructured":"Tom Schaul, John Quan, Ioannis Antonoglou, and David Silver. 2015. Prioritized Experience Replay. arXiv preprint arXiv:1511.05952 (2015)."},{"key":"e_1_3_2_1_27_1","volume-title":"4th International Conference on Learning Representations, ICLR","author":"Stadie Bradly C","year":"2016","unstructured":"Bradly C Stadie, Sergey Levine, and Pieter Abbeel. 2015. Incentivizing Exploration in Reinforcement Learning with Deep Predictive Models. 4th International Conference on Learning Representations, ICLR 2016, San Juan, Puerto Rico, May 2--4, 2016, Conference Track Proceedings. 2016 (2015)."},{"key":"e_1_3_2_1_28_1","volume-title":"Vinicius Zambaldi, Max Jaderberg, Marc Lanctot, Nicolas Sonnerat, Joel Z Leibo, Karl Tuyls, et al.","author":"Sunehag Peter","year":"2018","unstructured":"Peter Sunehag, Guy Lever, Audrunas Gruslys, Wojciech Marian Czarnecki, Vinicius Zambaldi, Max Jaderberg, Marc Lanctot, Nicolas Sonnerat, Joel Z Leibo, Karl Tuyls, et al. 2018. Value-Decomposition Networks For Cooperative Multi-Agent Learning Based On Team Reward. Sunehag, Peter, et al. \"Value-Decomposition Networks For Cooperative Multi-Agent Learning Based On Team Reward.\" AAMAS. 2018. (2018)."},{"key":"e_1_3_2_1_29_1","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton Richard S","year":"2018","unstructured":"Richard S Sutton and Andrew G Barto. 2018. Reinforcement Learning: An Introduction. MIT press."},{"key":"e_1_3_2_1_30_1","volume-title":"Yan Duan, John Schulman, Filip DeTurck, and Pieter Abbeel.","author":"Tang Haoran","year":"2017","unstructured":"Haoran Tang, Rein Houthooft, Davis Foote, Adam Stooke, OpenAI Xi Chen, Yan Duan, John Schulman, Filip DeTurck, and Pieter Abbeel. 2017. # Exploration: A Study of Count-Based Exploration for Deep Reinforcement Learning. In Advances in Neural Information Processing Systems. 2753--2762."},{"key":"e_1_3_2_1_31_1","volume-title":"Influence-Based Multi-Agent Exploration. 8th International Conference on Learning Representations (ICLR 2020)","author":"Wang Tonghan","year":"2019","unstructured":"Tonghan Wang, Jianhao Wang, Yi Wu, and Chongjie Zhang. 2019. Influence-Based Multi-Agent Exploration. 8th International Conference on Learning Representations (ICLR 2020) (2019)."},{"key":"e_1_3_2_1_32_1","volume-title":"Sample Efficient Actor-Critic with Experience Replay. 5th International Conference on Learning Representations, ICLR 2017, Toulon, France, April 24--26, 2017, Conference Track Proceedings","author":"Wang Ziyu","year":"2016","unstructured":"Ziyu Wang, Victor Bapst, Nicolas Heess, Volodymyr Mnih, Remi Munos, Koray Kavukcuoglu, and Nando de Freitas. 2016. Sample Efficient Actor-Critic with Experience Replay. 5th International Conference on Learning Representations, ICLR 2017, Toulon, France, April 24--26, 2017, Conference Track Proceedings (2016)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2009.05.011"},{"key":"e_1_3_2_1_34_1","volume-title":"Multiagent Soft Q-learning. In 2018 AAAI Spring Symposium Series.","author":"Wei Ermo","year":"2018","unstructured":"Ermo Wei, Drew Wicke, David Freelan, and Sean Luke. 2018. Multiagent Soft Q-learning. In 2018 AAAI Spring Symposium Series."},{"key":"e_1_3_2_1_35_1","volume-title":"Simple Statistical Gradient-Following Algorithms for Connectionist Reinforcement Learning. Machine learning 8, 3--4","author":"Williams Ronald J","year":"1992","unstructured":"Ronald J Williams. 1992. Simple Statistical Gradient-Following Algorithms for Connectionist Reinforcement Learning. Machine learning 8, 3--4 (1992), 229--256."},{"key":"e_1_3_2_1_36_1","volume-title":"Macro-Action-Based Deep Multi-Agent Reinforcement Learning. In 3rd Conference on Robot Learning (CoRL","author":"Xiao Yuchen","year":"2019","unstructured":"Yuchen Xiao, Joshua Hoffman, and Christopher Amato. 2019. Macro-Action-Based Deep Multi-Agent Reinforcement Learning. In 3rd Conference on Robot Learning (CoRL 2019), Osaka, Japan."}],"event":{"name":"SAC '21: The 36th ACM\/SIGAPP Symposium on Applied Computing","location":"Virtual Event Republic of Korea","acronym":"SAC '21","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"]},"container-title":["Proceedings of the 36th Annual ACM Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3412841.3441953","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3412841.3441953","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:02:24Z","timestamp":1750197744000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3412841.3441953"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,3,22]]},"references-count":36,"alternative-id":["10.1145\/3412841.3441953","10.1145\/3412841"],"URL":"https:\/\/doi.org\/10.1145\/3412841.3441953","relation":{},"subject":[],"published":{"date-parts":[[2021,3,22]]},"assertion":[{"value":"2021-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}