{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T15:45:57Z","timestamp":1775144757817,"version":"3.50.1"},"reference-count":57,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Major Research Project of the National Natural Science Foundation of China","award":["92267110"],"award-info":[{"award-number":["92267110"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62476225"],"award-info":[{"award-number":["62476225"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62076202"],"award-info":[{"award-number":["62076202"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Key R&#x0026;D Program of China","award":["2023YFF0905604"],"award-info":[{"award-number":["2023YFF0905604"]}]},{"name":"Shaanxi Province Key Research and Development Program of China","award":["2023-YBGY-354"],"award-info":[{"award-number":["2023-YBGY-354"]}]},{"name":"Hebei Province Central Leading Local Science and Technology Development Project","award":["246Z1817G"],"award-info":[{"award-number":["246Z1817G"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cogn. Dev. Syst."],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1109\/tcds.2024.3460368","type":"journal-article","created":{"date-parts":[[2024,9,13]],"date-time":"2024-09-13T14:52:00Z","timestamp":1726239120000},"page":"379-389","source":"Crossref","is-referenced-by-count":1,"title":["Neighborhood-Curiosity-Based Exploration in Multiagent Reinforcement Learning"],"prefix":"10.1109","volume":"17","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-2033-0535","authenticated-orcid":false,"given":"Shike","family":"Yang","sequence":"first","affiliation":[{"name":"School of Cybersecurity, Northwestern Polytechnical University, Xi&#x2019;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2218-8550","authenticated-orcid":false,"given":"Ziming","family":"He","sequence":"additional","affiliation":[{"name":"School of Computer Science, Northwestern Polytechnical University, Xi&#x2019;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0905-0816","authenticated-orcid":false,"given":"Jingchen","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer Science, Northwestern Polytechnical University, Xi&#x2019;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2180-8941","authenticated-orcid":false,"given":"Haobin","family":"Shi","sequence":"additional","affiliation":[{"name":"School of Computer Science, Northwestern Polytechnical University, Xi&#x2019;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9220-4294","authenticated-orcid":false,"given":"Qingbing","family":"Ji","sequence":"additional","affiliation":[{"name":"Key Laboratories for Confidential Communications, Thirtieth Research Institute of CETC Corporation, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9234-4836","authenticated-orcid":false,"given":"Kao-Shing","family":"Hwang","sequence":"additional","affiliation":[{"name":"Department of Electrical Engineering, National Sun Yat-sen University, Taiwan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0101-3973","authenticated-orcid":false,"given":"Xianshan","family":"Li","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Yanshan University, Qinghuangdao, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TFUZZ.2022.3170646"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3108237"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.03.057"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2023.3326551"},{"key":"ref5","article-title":"Guided deep reinforcement learning for swarm systems","author":"H\u00fcttenrauch","year":"2017"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2012.2219061"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1287\/moor.27.4.819.297"},{"key":"ref8","article-title":"Value-decomposition networks for cooperative multi-agent learning","author":"Sunehag","year":"2017"},{"key":"ref9","first-page":"4295","article-title":"Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Rashid","year":"2018"},{"key":"ref10","first-page":"5887","article-title":"QTRAN: Learning to factorize with transformation for cooperative multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Son","year":"2019"},{"key":"ref11","article-title":"QPLEX: Duplex dueling multi-agent q-learning","author":"Wang","year":"2020"},{"key":"ref12","article-title":"MAVEN: Multi-agent variational exploration","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Mahajan","year":"2019"},{"key":"ref13","first-page":"2721","article-title":"Count-based exploration with neural density models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ostrovski","year":"2017"},{"key":"ref14","article-title":"# exploration: A study of count-based exploration for deep reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Tang","year":"2017"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref16","article-title":"Exploration by random network distillation","author":"Burda","year":"2018"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.08.024"},{"key":"ref18","article-title":"VIME: Variational information maximizing exploration","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Houthooft","year":"2016"},{"key":"ref19","article-title":"Influence-based multi-agent exploration","author":"Wang","year":"2019"},{"key":"ref20","first-page":"6826","article-title":"Cooperative exploration for multi-agent deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Liu","year":"2021"},{"key":"ref21","article-title":"Episodic curiosity through reachability","author":"Savinov","year":"2018"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636297"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2019.8852254"},{"key":"ref24","first-page":"8583","article-title":"Planning to explore via self-supervised world models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Sekar","year":"2020"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6212"},{"key":"ref26","article-title":"Graph convolutional reinforcement learning","author":"Jiang","year":"2018"},{"key":"ref27","article-title":"Learning attentional communication for multi-agent cooperation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Jiang","year":"2018"},{"key":"ref28","first-page":"5571","article-title":"Mean field multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Yang","year":"2018"},{"key":"ref29","article-title":"The starcraft multi-agent challenge","author":"Samvelyan","year":"2019"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5955"},{"key":"ref32","first-page":"37631","article-title":"Exploration via elliptical episodic bonuses","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Henaff","year":"2022"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2023.3335944"},{"key":"ref34","first-page":"37719","article-title":"Exploit reward shifting in value-based deep-RL: Optimistic curiosity-based exploration and conservative exploitation via linear reward shaping","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Sun","year":"2022"},{"key":"ref35","article-title":"Beyond information gain: An empirical benchmark for low-switching-cost reinforcement learning","author":"Xu","year":"2023","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref36","article-title":"Self-supervised sequential information bottleneck for robust exploration in deep reinforcement learning","author":"You","year":"2022"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.5555\/2986459.2986721"},{"key":"ref38","first-page":"28226","article-title":"Learning to discover skills through guidance","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Kim","year":"2023"},{"key":"ref39","article-title":"ComSD: Balancing behavioral quality and diversity in unsupervised skill discovery","author":"Liu","year":"2023"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2021.3062728"},{"key":"ref41","article-title":"Constrained ensemble exploration for unsupervised skill discovery","author":"Bai","year":"2024"},{"key":"ref42","article-title":"PEAC: Unsupervised pre-training for cross-embodiment reinforcement learning","author":"Ying","year":"2024"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2023.3339215"},{"key":"ref44","first-page":"39183","article-title":"Behavior contrastive learning for unsupervised skill discovery","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Yang","year":"2023"},{"key":"ref45","first-page":"6736","article-title":"APS: Active pretraining with successor features","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Liu","year":"2021"},{"key":"ref46","article-title":"Diversity is all you need: Learning skills without a reward function","volume-title":"Proc. 7th Int. Conf. Learn. Representations (ICLR)","author":"Eysenbach","year":"2019"},{"key":"ref47","first-page":"18459","article-title":"Behavior from the void: Unsupervised active pre-training","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Liu","year":"2021"},{"key":"ref48","first-page":"5572","article-title":"Unsupervised skill discovery with bottleneck option learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kim","year":"2021"},{"key":"ref49","first-page":"1317","article-title":"Explore, discover and learn: Unsupervised discovery of state-covering skills","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Campos","year":"2020"},{"key":"ref50","article-title":"Dynamics-aware unsupervised discovery of skills","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Sharma","year":"2019"},{"key":"ref51","first-page":"3040","article-title":"Social influence as intrinsic motivation for multi-agent deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jaques","year":"2019"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/tcds.2023.3323987"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/tcds.2023.3339131"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2023.3281878"},{"key":"ref55","first-page":"3757","article-title":"Episodic multi-agent reinforcement learning with curiosity-driven exploration","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Zheng","year":"2021"},{"key":"ref56","first-page":"5510","article-title":"Towards a standardised performance evaluation protocol for cooperative marl","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Gorsane","year":"2022"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.06.049"}],"container-title":["IEEE Transactions on Cognitive and Developmental Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7274989\/10947641\/10680348.pdf?arnumber=10680348","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,16]],"date-time":"2025-12-16T18:33:04Z","timestamp":1765909984000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10680348\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4]]},"references-count":57,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tcds.2024.3460368","relation":{},"ISSN":["2379-8920","2379-8939"],"issn-type":[{"value":"2379-8920","type":"print"},{"value":"2379-8939","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4]]}}}