{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T10:20:37Z","timestamp":1730283637290,"version":"3.28.0"},"reference-count":46,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,12,4]],"date-time":"2023-12-04T00:00:00Z","timestamp":1701648000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,4]],"date-time":"2023-12-04T00:00:00Z","timestamp":1701648000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000181","name":"Air Force Office of Scientific Research","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000181","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,12,4]]},"DOI":"10.1109\/mrs60187.2023.10416789","type":"proceedings-article","created":{"date-parts":[[2024,2,5]],"date-time":"2024-02-05T18:32:59Z","timestamp":1707157979000},"page":"92-99","source":"Crossref","is-referenced-by-count":0,"title":["Entropy Maximization in High Dimensional Multiagent State Spaces"],"prefix":"10.1109","author":[{"given":"Ayhan Alp","family":"Aydeniz","sequence":"first","affiliation":[{"name":"Oregon State University,Collaborative Robotics and Intelligent Systems Institute,Corvallis,Oregon,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Enrico","family":"Marchesini","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology,Laboratory for Information &#x0026; Decision Systems,Cambridge,Massachusetts,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Robert","family":"Loftin","sequence":"additional","affiliation":[{"name":"University of Sheffield,Department of Computer Science,Sheffield,United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kagan","family":"Tumer","sequence":"additional","affiliation":[{"name":"Oregon State University,Collaborative Robotics and Intelligent Systems Institute,Corvallis,Oregon,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1609\/aimag.v35i4.2556"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/JAS.2021.1004269"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636349"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812341"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143949"},{"key":"ref6","article-title":"Unifying count-based exploration and intrinsic motivation","volume":"29","author":"Bellemare","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref7","article-title":"# exploration: A study of count-based exploration for deep reinforcement learning","volume":"30","author":"Tang","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref8","article-title":"Ex2: Exploration with exemplar models for deep reinforcement learning","volume":"30","author":"Fu","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref9","article-title":"Never give up: Learning directed exploration strategies","author":"Badia","year":"2020","journal-title":"arXiv preprint arXiv:2002.06038"},{"key":"ref10","first-page":"9443","article-title":"State entropy maximization with random encoders for efficient exploration","volume-title":"International Conference on Machine Learning","author":"Seo"},{"key":"ref11","first-page":"6651","article-title":"Evolutionary reinforcement learning for sample-efficient multiagent coordination","volume-title":"International Conference on Machine Learning","author":"Majumdar"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1142\/S0219525901000188"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3583131.3590428"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1080\/01966324.2003.10737616"},{"key":"ref15","first-page":"17","article-title":"Nonparametric entropy estimation: An overview","author":"Beirlant","year":"1997","journal-title":"International Journal of Mathematical and Statistical Sciences"},{"article-title":"Unifying temporal and structural credit assignment problems","volume-title":"Autonomous Agents and Multi-Agent Systems Conference","author":"Agogino","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8"},{"article-title":"Improving deep policy gradients with value function search","volume-title":"The Eleventh International Conference on Learning Representations","author":"Marchesini","key":"ref18"},{"article-title":"Genetic soft updates for policy evolution in deep reinforcement learning","volume-title":"International Conference on Learning Representations","author":"Marchesini","key":"ref19"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3477314.3507182"},{"key":"ref21","article-title":"Evolution-guided policy gradient in reinforcement learning","volume":"31","author":"Khadka","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i7.20737"},{"key":"ref23","first-page":"1919","article-title":"Genetic deep reinforcement learning for mapless navigation","volume-title":"Proceedings of the 19th International Conference on Autonomous Agents and MultiAgent Systems","author":"Marchesini"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3520304.3529035"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-1770-5_3"},{"key":"ref26","article-title":"Vime: Variational information maximizing exploration","volume":"29","author":"Houthooft","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref27","article-title":"Action-conditional video prediction using deep networks in atari games","volume":"28","author":"Oh","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref29","article-title":"Incentivizing exploration in reinforcement learning with deep predictive models","author":"Stadie","year":"2015","journal-title":"arXiv preprint arXiv:1507.00814"},{"key":"ref30","article-title":"Go-explore: a new approach for hard-exploration problems","author":"Ecoffet","year":"2019","journal-title":"arXiv preprint arXiv:1901.10995"},{"key":"ref31","article-title":"Learning to play with intrinsically-motivated, self-aware agents","volume":"31","author":"Haber","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref32","article-title":"Efficient exploration via state marginal matching","author":"Lee","year":"2019","journal-title":"arXiv preprint arXiv:1906.05274"},{"key":"ref33","first-page":"2681","article-title":"Provably efficient maximum entropy exploration","volume-title":"International Conference on Machine Learning","author":"Hazan"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i10.17091"},{"key":"ref35","first-page":"18 459","article-title":"Behavior from the void: Unsupervised active pre-training","volume":"34","author":"Liu","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1002\/j.1538-7305.1948.tb01338.x"},{"article-title":"Product distribution theory for control of multi-agent systems","volume-title":"Proceedings of the Third International Joint Conference on Autonomous Agents and Multiagent Systems, 2004. AAMAS 2004","author":"Lee","key":"ref37"},{"key":"ref38","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International conference on machine learning","author":"Haarnoja"},{"key":"ref39","article-title":"Diversity is all you need: Learning skills without a reward function","author":"Eysenbach","year":"2018","journal-title":"arXiv preprint arXiv:1802.06070"},{"key":"ref40","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv"},{"issue":"2","key":"ref41","first-page":"9","article-title":"Sample estimate of the entropy of a random vector","volume":"23","author":"Kozachenko","year":"1987","journal-title":"Problemy Peredachi Informatsii"},{"key":"ref42","first-page":"13 550","article-title":"Heuristic-guided reinforcement learning","volume":"34","author":"Cheng","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref43","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume-title":"International conference on machine learning","author":"Fujimoto"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/9780470544600"},{"key":"ref45","first-page":"1466","article-title":"Safe deep reinforcement learning by verifying task-level properties","volume-title":"Proceedings of the 2023 International Conference on Autonomous Agents and Multiagent Systems","author":"Marchesini"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161312"}],"event":{"name":"2023 International Symposium on Multi-Robot and Multi-Agent Systems (MRS)","start":{"date-parts":[[2023,12,4]]},"location":"Boston, MA, USA","end":{"date-parts":[[2023,12,5]]}},"container-title":["2023 International Symposium on Multi-Robot and Multi-Agent Systems (MRS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10416762\/10416768\/10416789.pdf?arnumber=10416789","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,6]],"date-time":"2024-02-06T22:25:27Z","timestamp":1707258327000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10416789\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,4]]},"references-count":46,"URL":"https:\/\/doi.org\/10.1109\/mrs60187.2023.10416789","relation":{},"subject":[],"published":{"date-parts":[[2023,12,4]]}}}