{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T19:10:10Z","timestamp":1764270610191,"version":"3.46.0"},"reference-count":32,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Aviation Foundation of National Key Laboratory of Air-based Information Perception and Fusion","award":["ASFC-20240001070002"],"award-info":[{"award-number":["ASFC-20240001070002"]}]},{"name":"Open-End Foundation of National Key Laboratory of Air-based Information Perception and Fusion 6A"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Emerg. Top. Comput. Intell."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1109\/tetci.2025.3555250","type":"journal-article","created":{"date-parts":[[2025,4,14]],"date-time":"2025-04-14T13:41:09Z","timestamp":1744638069000},"page":"3665-3676","source":"Crossref","is-referenced-by-count":0,"title":["Improving Exploration in Deep Reinforcement Learning for Incomplete Information Competition Environments"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3476-110X","authenticated-orcid":false,"given":"Jie","family":"Lin","sequence":"first","affiliation":[{"name":"Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuhao","family":"Ye","sequence":"additional","affiliation":[{"name":"Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8470-010X","authenticated-orcid":false,"given":"Shaobo","family":"Li","sequence":"additional","affiliation":[{"name":"Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8869-6863","authenticated-orcid":false,"given":"Hanlin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Qingdao University, Qingdao, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7033-9315","authenticated-orcid":false,"given":"Peng","family":"Zhao","sequence":"additional","affiliation":[{"name":"Xi&#x0027;an Jiaotong University, Xi&#x0027;an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref2","first-page":"12333","article-title":"Douzero: Mastering Doudizhu with self-play deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zha","year":"2021"},{"article-title":"SUPHX: Mastering Mahjong with deep reinforcement learning","year":"2020","author":"Li","key":"ref3"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-13122-6_15"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1142\/S0129183101002851"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1949.10483310"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2007.11.026"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1049\/joe.2019.1200"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1126\/science.aay7774"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/764"},{"key":"ref13","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mnih","year":"2016"},{"article-title":"Episodic curiosity through reachability","year":"2018","author":"Savinov","key":"ref14"},{"key":"ref15","first-page":"4403","article-title":"LIIR: Learning individual intrinsic reward in multi-agent reinforcement learning","volume-title":"Proc. Int. Conf. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Du","year":"2019"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.117418"},{"article-title":"Ride: Rewarding impact-driven exploration for procedurally-generated environments","year":"2020","author":"Raileanu","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.12440"},{"key":"ref19","first-page":"20118","article-title":"Explicable reward design for reinforcement learning agents","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Devidze","year":"2021"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.trc.2020.102738"},{"key":"ref21","first-page":"6768","article-title":"Inverse reward design","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Hadfield-Menell","year":"2017"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636297"},{"key":"ref23","first-page":"8583","article-title":"Planning to explore via self-supervised world models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Sekar","year":"2020"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2020.103630"},{"key":"ref25","first-page":"3836","article-title":"The uncertainty Bellman equation and exploration","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"O\u2019Donoghue","year":"2018"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2005.33"},{"article-title":"The lottery ticket hypothesis: Finding sparse, trainable neural networks","year":"2018","author":"Frankle","key":"ref27"},{"article-title":"Multi-stage episodic control for strategic exploration in text games","year":"2022","author":"Tuyls","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6297"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.3316387"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.120801"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN54540.2023.10190993"}],"container-title":["IEEE Transactions on Emerging Topics in Computational Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/7433297\/11177632\/10964687.pdf?arnumber=10964687","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:59:52Z","timestamp":1764269992000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10964687\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":32,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tetci.2025.3555250","relation":{},"ISSN":["2471-285X"],"issn-type":[{"type":"electronic","value":"2471-285X"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}