{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T08:48:09Z","timestamp":1772786889174,"version":"3.50.1"},"reference-count":34,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100003696","name":"Electronics and Telecommunications Research Institute","doi-asserted-by":"publisher","award":["19YE1410"],"award-info":[{"award-number":["19YE1410"]}],"id":[{"id":"10.13039\/501100003696","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003696","name":"Electronics and Telecommunications Research Institute","doi-asserted-by":"publisher","award":["22ZS1100"],"award-info":[{"award-number":["22ZS1100"]}],"id":[{"id":"10.13039\/501100003696","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000181","name":"Air Force Office of Scientific Research","doi-asserted-by":"publisher","award":["FA2386-19-1-4020"],"award-info":[{"award-number":["FA2386-19-1-4020"]}],"id":[{"id":"10.13039\/100000181","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2022]]},"DOI":"10.1109\/access.2022.3171053","type":"journal-article","created":{"date-parts":[[2022,4,28]],"date-time":"2022-04-28T20:26:38Z","timestamp":1651177598000},"page":"47741-47753","source":"Crossref","is-referenced-by-count":6,"title":["A Novel and Efficient Influence-Seeking Exploration in Deep Multiagent Reinforcement Learning"],"prefix":"10.1109","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0857-5565","authenticated-orcid":false,"given":"Byunghyun","family":"Yoo","sequence":"first","affiliation":[{"name":"Electronics and Telecommunications Research Institute (ETRI), Daejeon, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Devarani Devi","family":"Ningombam","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, GITAM University, Visakhapatnam, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sungwon","family":"Yi","sequence":"additional","affiliation":[{"name":"Electronics and Telecommunications Research Institute (ETRI), Daejeon, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hyun Woo","family":"Kim","sequence":"additional","affiliation":[{"name":"Electronics and Telecommunications Research Institute (ETRI), Daejeon, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Euisok","family":"Chung","sequence":"additional","affiliation":[{"name":"Electronics and Telecommunications Research Institute (ETRI), Daejeon, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ran","family":"Han","sequence":"additional","affiliation":[{"name":"Electronics and Telecommunications Research Institute (ETRI), Daejeon, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8216-4812","authenticated-orcid":false,"given":"Hwa Jeon","family":"Song","sequence":"additional","affiliation":[{"name":"Electronics and Telecommunications Research Institute (ETRI), Daejeon, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"StarCraft II: A new challenge for reinforcement learning","author":"Vinyals","year":"2017","journal-title":"arXiv:1708.04782"},{"key":"ref2","article-title":"Dota 2 with large scale deep reinforcement learning","author":"Berner","year":"2019","journal-title":"arXiv:1912.06680"},{"key":"ref3","first-page":"1","article-title":"Value-decomposition networks for cooperative multi-agent learning based on team reward","volume-title":"Proc. 17th Int. Conf. Auton. Agents Multiagent Syst.","author":"Sunehag"},{"key":"ref4","first-page":"4295","article-title":"QMIX: Monotonic value function factorisation for deep multi-agent reinforcement learning","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Rashid"},{"key":"ref5","first-page":"5887","article-title":"QTRAN: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Son"},{"key":"ref6","first-page":"1","article-title":"Weighted QMIX: Expanding monotonic value function factorisation for deep multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Rashid"},{"key":"ref7","article-title":"QR-MIX: Distributional value function factorisation for cooperative multi-agent reinforcement learning","author":"Hu","journal-title":"arXiv:2009.04197"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3113350"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"ref10","first-page":"6382","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Lowe"},{"key":"ref11","first-page":"2186","article-title":"The StarCraft multi-agent challenge","volume-title":"Proc. 18th Int. Conf. Auton. Agents Multiagent Syst.","author":"Samvelyan"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21236\/ada333248"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref14","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Schulman"},{"key":"ref15","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref16","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"Proc. 31st Int. Conf. Mach. Learn. (ICML)","author":"Silver"},{"key":"ref17","first-page":"1","article-title":"Continuous control with deep reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Lillicrap"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014213"},{"key":"ref19","first-page":"2961","article-title":"Actor-attention-critic for multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Iqbal"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6214"},{"key":"ref21","first-page":"1","article-title":"QPLEX: Duplex dueling multi-agent Q-learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Wang"},{"key":"ref22","first-page":"9945","article-title":"DFAC framework: Factorizing the value function via quantile mixture for multi-agent distributional Q-learning","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Sun"},{"key":"ref23","first-page":"1","article-title":"RMIX: Learning risk-sensitive policies for cooperative reinforcement learning agents","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Qiu"},{"key":"ref24","first-page":"1","article-title":"RODE: Learning roles to decompose multi-agent tasks","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Wang"},{"key":"ref25","article-title":"Multiagent soft Q-learning","author":"Wei","year":"2018","journal-title":"arXiv:1804.09817"},{"key":"ref26","first-page":"7611","article-title":"MAVEN: Multiagent variational exploration","volume-title":"Proc. 32nd Int. Conf. Neural Inf. Process. Syst.","author":"Mahajan"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAIRO47923.2019.00035"},{"key":"ref29","first-page":"1","article-title":"Influence based multi-agent exploration","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Wang"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8_2"},{"key":"ref31","first-page":"1","article-title":"Deep recurrent Q-learning for partially observable MDPS","volume-title":"Proc. AAAI Fall Symp. Sequential Decis. Making Intell. Agents","author":"Hausknecht"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00302"},{"key":"ref33","first-page":"980","article-title":"Deep coordination graphs","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"B\u00f6hmer"},{"key":"ref34","first-page":"1","article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","volume-title":"Proc. NIPS Workshop Deep Learn.","author":"Chung"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/9668973\/09764683.pdf?arnumber=9764683","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T22:39:40Z","timestamp":1705963180000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9764683\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/access.2022.3171053","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]}}}