{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T09:37:04Z","timestamp":1743068224428,"version":"3.37.3"},"reference-count":40,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61876181"],"award-info":[{"award-number":["61876181"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004739","name":"Youth Innovation Promotion Association, and CAS and the Projects of Chinese Academy of Science","doi-asserted-by":"publisher","award":["QYZDB-SSWJSC006"],"award-info":[{"award-number":["QYZDB-SSWJSC006"]}],"id":[{"id":"10.13039\/501100004739","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,7,18]]},"DOI":"10.1109\/ijcnn55064.2022.9891948","type":"proceedings-article","created":{"date-parts":[[2022,9,30]],"date-time":"2022-09-30T19:56:04Z","timestamp":1664567764000},"page":"1-8","source":"Crossref","is-referenced-by-count":2,"title":["Multi-Agent Uncertainty Sharing for Cooperative Multi-Agent Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Guangkai","family":"Yang","sequence":"first","affiliation":[{"name":"CRISE, Institute of Automation, Chinese Academy of Sciences,Beijing,P.R.China,100190"}]},{"given":"Hao","family":"Chen","sequence":"additional","affiliation":[{"name":"CRISE, Institute of Automation, Chinese Academy of Sciences,Beijing,P.R.China,100190"}]},{"given":"Junge","family":"Zhang","sequence":"additional","affiliation":[{"name":"CRISE, Institute of Automation, Chinese Academy of Sciences,Beijing,P.R.China,100190"}]},{"given":"Qiyue","family":"Yin","sequence":"additional","affiliation":[{"name":"CRISE, Institute of Automation, Chinese Academy of Sciences,Beijing,P.R.China,100190"}]},{"given":"Kaiqi","family":"Huang","sequence":"additional","affiliation":[{"name":"CRISE, Institute of Automation, Chinese Academy of Sciences,Beijing,P.R.China,100190"}]}],"member":"263","reference":[{"key":"ref39","article-title":"Riit: Rethinking the importance of implementation tricks in multi-agent reinforcement learning","author":"hu","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref38","article-title":"Playing atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"ArXiv Preprint"},{"key":"ref33","article-title":"Neural thompson sampling","author":"zhang","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref32","article-title":"A unifying theory of thompson sampling for continuous risk-averse bandits","author":"chang","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref31","article-title":"A tutorial on thompson sampling","author":"russo","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref30","article-title":"Auto-encoding variational bayes","author":"kingma","year":"2013","journal-title":"ArXiv Preprint"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCBDA51879.2021.9442583"},{"key":"ref36","first-page":"10 757","article-title":"Multi-agent determinantal q-learning","author":"yang","year":"0","journal-title":"Int Conference on Machine Learning"},{"key":"ref35","article-title":"An empirical evaluation of thompson sampling","volume":"24","author":"chapelle","year":"2011","journal-title":"Advances in neural information processing systems"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/25.3-4.285"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-63833-7_38"},{"key":"ref40","first-page":"11 853","article-title":"Learning implicit credit assignment for cooperative multi-agent reinforcement learning","volume":"33","author":"zhou","year":"2020","journal-title":"Ad-vances in Neural Information Processing Systems"},{"key":"ref11","article-title":"Efficient ex-ploration with double uncertain value networks","author":"moerland","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref12","article-title":"Noisy networks for exploration","author":"fortunato","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref13","article-title":"Parameter space noise for exploration","author":"plappert","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref14","first-page":"1613","article-title":"Weight uncertainty in neural network","author":"blundell","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref15","article-title":"Deep exploration via bootstrapped dqn","volume":"29","author":"osband","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref16","article-title":"Bbq-networks: Efficient exploration in deep reinforcement learning for task-oriented dialogue systems","volume":"32","author":"lipton","year":"0","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"ref17","first-page":"449","article-title":"A distributional perspective on reinforcement learning","author":"bellemare","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11791"},{"key":"ref19","first-page":"1096","article-title":"Implicit quantile networks for distributional reinforcement learning","author":"dabney","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"},{"key":"ref27","article-title":"The starcraft multi-agent challenge","author":"samvelyan","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"604","DOI":"10.1038\/s41586-020-03051-4","article-title":"Mastering atari, go, chess and shogi by planning with a learned model","volume":"588","author":"schrittwieser","year":"2020","journal-title":"Nature"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2522401"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/168304.168306"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1038\/nature24270","article-title":"Mastering the game of go without human knowledge","volume":"550","author":"silver","year":"2017","journal-title":"Nature"},{"key":"ref8","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref7","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref9","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"2015","journal-title":"ArXiv Preprint"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref1"},{"key":"ref20","article-title":"Distributional reinforcement learning with maximum mean discrepancy","author":"nguyen","year":"2020","journal-title":"Association for the Advancement of Artificial Intelligence (AAAI)"},{"key":"ref22","article-title":"Value-decomposition networks for cooperative multi-agent learning","author":"sunehag","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref21","first-page":"723","article-title":"A kernel two-sample test","volume":"13","author":"gretton","year":"2012","journal-title":"The Journal of Machine Learning Research"},{"key":"ref24","first-page":"5887","article-title":"Qtran: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","author":"son","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref23","first-page":"4295","article-title":"Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning","author":"rashid","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN52387.2021.9533636"},{"key":"ref25","article-title":"Maven: Multi-agent variational exploration","volume":"32","author":"mahajan","year":"2019","journal-title":"Advances in neural information processing systems"}],"event":{"name":"2022 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2022,7,18]]},"location":"Padua, Italy","end":{"date-parts":[[2022,7,23]]}},"container-title":["2022 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9891857\/9889787\/09891948.pdf?arnumber=9891948","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,15]],"date-time":"2022-11-15T20:45:03Z","timestamp":1668545103000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9891948\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,18]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/ijcnn55064.2022.9891948","relation":{},"subject":[],"published":{"date-parts":[[2022,7,18]]}}}