{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T16:13:56Z","timestamp":1773850436894,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,26]],"date-time":"2021-10-26T00:00:00Z","timestamp":1635206400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2017YFB1001901"],"award-info":[{"award-number":["2017YFB1001901"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Science and Technology Commission of Tianjin Binhai New Area","award":["BHXQKJXM-PT-RGZNJMZX-2019001"],"award-info":[{"award-number":["BHXQKJXM-PT-RGZNJMZX-2019001"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,26]]},"DOI":"10.1145\/3459637.3482326","type":"proceedings-article","created":{"date-parts":[[2021,10,30]],"date-time":"2021-10-30T18:33:14Z","timestamp":1635618794000},"page":"2321-2330","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["CIExplore"],"prefix":"10.1145","author":[{"given":"Huanhuan","family":"Yang","sequence":"first","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dianxi","family":"Shi","sequence":"additional","affiliation":[{"name":"Defense Innovation Institute &amp; Tianjin Artificial Intelligence Innovation Center, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chenran","family":"Zhao","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guojun","family":"Xie","sequence":"additional","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shaowu","family":"Yang","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,10,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.5555\/3157096.3157262"},{"key":"e_1_3_2_1_2_1","volume-title":"Mulex: Disentangling exploitation from exploration in deep rl. arXiv preprint arXiv:1907.00868","author":"Beyer Lucas","year":"2019"},{"key":"e_1_3_2_1_3_1","volume-title":"The 2nd Exploration in Reinforcement Learning Workshop at the International Conference on Machine Learning","author":"B\u00f6hmer Wendelin","year":"2019"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.5555\/993483"},{"key":"e_1_3_2_1_5_1","volume-title":"The 7th International Conference on Learning Representations","author":"Burda Yuri","year":"2019"},{"key":"e_1_3_2_1_6_1","volume-title":"The 7th International Conference on Learning Representations","author":"Burda Yuri","year":"2018"},{"key":"e_1_3_2_1_7_1","volume-title":"Minimalistic gridworld environment for openai gym. GitHub repository","author":"Chevalier-Boisvert Maxime","year":"2018"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1146\/annurev.psych.55.090902.142015"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"e_1_3_2_1_11_1","first-page":"3","article-title":"An introduction to deep reinforcement learning","volume":"11","author":"Lavet Vincent Francc","year":"2018","journal-title":"Foundations and Trends in Machine Learning"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.5555\/3305381.3305521"},{"key":"e_1_3_2_1_13_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Haarnoja Tuomas","year":"2018"},{"key":"e_1_3_2_1_14_1","volume-title":"A survey of learning in multiagent environments: Dealing with non-stationarity. arXiv preprint arXiv:1707.09183","author":"Hernandez-Leal Pablo","year":"2017"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10458-019-09421-1"},{"key":"e_1_3_2_1_16_1","volume-title":"International Conference on Machine Learning. PMLR, 2961--2970","author":"Iqbal Shariq","year":"2019"},{"key":"e_1_3_2_1_17_1","volume-title":"Coordinated Exploration via Intrinsic Rewards for Multi-Agent Reinforcement Learning. arXiv preprint arXiv:1905.12127","author":"Iqbal Shariq","year":"2019"},{"key":"e_1_3_2_1_18_1","volume-title":"International Conference on Machine Learning. PMLR, 3040--3049","author":"Jaques Natasha","year":"2019"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.5555\/3009657.3009799"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eng.2018.02.004"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295385"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5955"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.5555\/3172077.3172232"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei A Rusu Joel Veness Marc G Bellemare Alex Graves Martin Riedmiller Andreas K Fidjeland Georg Ostrovski etal 2015. Human-level control through deep reinforcement learning. nature Vol. 518 7540 (2015) 529--533.  Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei A Rusu Joel Veness Marc G Bellemare Alex Graves Martin Riedmiller Andreas K Fidjeland Georg Ostrovski et al. 2015. Human-level control through deep reinforcement learning. nature Vol. 518 7540 (2015) 529--533.","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_1_25_1","volume-title":"Science","volume":"356","author":"Matej Moravvc","year":"2017"},{"key":"e_1_3_2_1_26_1","volume-title":"Counterfactuals and causal inference","author":"Morgan Stephen L"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.5555\/2967142"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.5555\/3305890.3305962"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2006.890271"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.5555\/3305890.3305968"},{"key":"e_1_3_2_1_31_1","volume-title":"International Conference on Machine Learning. PMLR, 5062--5071","author":"Pathak Deepak","year":"2019"},{"key":"e_1_3_2_1_32_1","volume-title":"Intrinsic and extrinsic motivations: Classic definitions and new directions. Contemporary educational psychology","author":"Ryan Richard M","year":"2000"},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning","volume":"139","author":"Seo Younggyo","year":"2021"},{"key":"e_1_3_2_1_35_1","volume-title":"Julian Schrittwieser, Ioannis Antonoglou, Veda Panneershelvam, Marc Lanctot, et al.","author":"Silver David","year":"2016"},{"key":"e_1_3_2_1_36_1","volume-title":"Curiosity and motivation. The Oxford handbook of human motivation","author":"Silvia Paul J","year":"2012"},{"key":"e_1_3_2_1_37_1","volume-title":"International Conference on Machine Learning. PMLR, 5887--5896","author":"Son Kyunghwan","year":"2019"},{"key":"e_1_3_2_1_38_1","volume-title":"Incentivizing exploration in reinforcement learning with deep predictive models. arXiv preprint arXiv:1507.00814","author":"Stadie Bradly C","year":"2015"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2007.08.009"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.5555\/3312046"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.5555\/3294996.3295035"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/1329125.1329434"},{"key":"e_1_3_2_1_43_1","volume-title":"The 8th International Conference on Learning Representations","author":"Wang Tonghan","year":"2020"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1360\/N112017-00137"}],"event":{"name":"CIKM '21: The 30th ACM International Conference on Information and Knowledge Management","location":"Virtual Event Queensland Australia","acronym":"CIKM '21","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 30th ACM International Conference on Information &amp; Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3459637.3482326","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3459637.3482326","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:12:13Z","timestamp":1750191133000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3459637.3482326"}},"subtitle":["Curiosity and Influence-based Exploration in Multi-Agent Cooperative Scenarios with Sparse Rewards"],"short-title":[],"issued":{"date-parts":[[2021,10,26]]},"references-count":42,"alternative-id":["10.1145\/3459637.3482326","10.1145\/3459637"],"URL":"https:\/\/doi.org\/10.1145\/3459637.3482326","relation":{},"subject":[],"published":{"date-parts":[[2021,10,26]]},"assertion":[{"value":"2021-10-30","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}