{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T18:27:06Z","timestamp":1742927226605,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":26,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819970186"},{"type":"electronic","value":"9789819970193"}],"license":[{"start":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T00:00:00Z","timestamp":1699574400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T00:00:00Z","timestamp":1699574400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-7019-3_13","type":"book-chapter","created":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T00:02:57Z","timestamp":1699574577000},"page":"125-136","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Diverse Policies Converge in\u00a0Reward-Free Markov Decision Processes"],"prefix":"10.1007","author":[{"given":"Fanqi","family":"Lin","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0500-0141","authenticated-orcid":false,"given":"Shiyu","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei-Wei","family":"Tu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,11,10]]},"reference":[{"issue":"Nov","key":"13_CR1","first-page":"397","volume":"3","author":"P Auer","year":"2002","unstructured":"Auer, P.: Using confidence bounds for exploitation-exploration trade-offs. J. Mach. Learn. Res. 3(Nov), 397\u2013422 (2002)","journal-title":"J. Mach. Learn. Res."},{"issue":"2","key":"13_CR2","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1023\/A:1013689704352","volume":"47","author":"P Auer","year":"2002","unstructured":"Auer, P., Cesa-Bianchi, N., Fischer, P.: Finite-time analysis of the multiarmed bandit problem. Mach. Learn. 47(2), 235\u2013256 (2002)","journal-title":"Mach. Learn."},{"key":"13_CR3","unstructured":"Berner, C., et al.: Dota 2 with large scale deep reinforcement learning. arXiv preprint arXiv:1912.06680 (2019)"},{"key":"13_CR4","unstructured":"Chen, W., Huang, S., Chiang, Y., Chen, T., Zhu, J.: DGPO: discovering multiple strategies with diversity-guided policy optimization. In: Proceedings of the 2023 International Conference on Autonomous Agents and Multiagent Systems, pp. 2634\u20132636 (2023)"},{"key":"13_CR5","unstructured":"Chu, W., Li, L., Reyzin, L., Schapire, R.: Contextual bandits with linear payoff functions. In: Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics, pp. 208\u2013214. JMLR Workshop and Conference Proceedings (2011)"},{"key":"13_CR6","unstructured":"Ellis, B., et al.: SMACv2: an improved benchmark for cooperative multi-agent reinforcement learning. arXiv preprint arXiv:2212.07489 (2022)"},{"key":"13_CR7","unstructured":"Eysenbach, B., Gupta, A., Ibarz, J., Levine, S.: Diversity is all you need: learning skills without a reward function. In: International Conference on Learning Representations (2018)"},{"key":"13_CR8","unstructured":"Eysenbach, B., Salakhutdinov, R., Levine, S.: The information geometry of unsupervised reinforcement learning. In: International Conference on Learning Representations (2021)"},{"key":"13_CR9","unstructured":"Fu, W., Du, W., Li, J., Chen, S., Zhang, J., Wu, Y.: Iteratively learning novel strategies with diversity measured in state distances. Submitted to ICLR 2023 (2022)"},{"key":"13_CR10","unstructured":"Huang, S., et al.: Tikick: towards playing multi-agent football full games from single-agent demonstrations. arXiv preprint arXiv:2110.04507 (2021)"},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Huang, S., et al.: VMAPD: generate diverse solutions for multi-agent games with recurrent trajectory discriminators. In: 2022 IEEE Conference on Games (CoG), pp. 9\u201316. IEEE (2022)","DOI":"10.1109\/CoG51982.2022.9893722"},{"key":"13_CR12","first-page":"8198","volume":"33","author":"S Kumar","year":"2020","unstructured":"Kumar, S., Kumar, A., Levine, S., Finn, C.: One solution is not all you need: few-shot extrapolation via structured maxent RL. Adv. Neural. Inf. Process. Syst. 33, 8198\u20138210 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"13_CR13","unstructured":"Lanctot, M., et al.: A unified game-theoretic approach to multiagent reinforcement learning. In: Advances in neural information processing systems, vol. 30 (2017)"},{"key":"13_CR14","doi-asserted-by":"crossref","unstructured":"Li, L., Chu, W., Langford, J., Schapire, R.E.: A contextual-bandit approach to personalized news article recommendation. In: Proceedings of the 19th International Conference on World Wide Web, pp. 661\u2013670 (2010)","DOI":"10.1145\/1772690.1772758"},{"key":"13_CR15","unstructured":"Liu, X., et al.: Unifying behavioral and response diversity for open-ended learning in zero-sum games. arXiv preprint arXiv:2106.04958 (2021)"},{"key":"13_CR16","unstructured":"Mahajan, A., Rashid, T., Samvelyan, M., Whiteson, S.: Maven: multi-agent variational exploration. arXiv preprint arXiv:1910.07483 (2019)"},{"key":"13_CR17","unstructured":"Makoviychuk, V., et al.: Isaac gym: high performance GPU-based physics simulation for robot learning. arXiv preprint arXiv:2108.10470 (2021)"},{"key":"13_CR18","first-page":"2069","volume":"13","author":"BC May","year":"2012","unstructured":"May, B.C., Korda, N., Lee, A., Leslie, D.S.: Optimistic bayesian sampling in contextual-bandit problems. J. Mach. Learn. Res. 13, 2069\u20132106 (2012)","journal-title":"J. Mach. Learn. Res."},{"key":"13_CR19","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1016\/j.neunet.2022.04.009","volume":"152","author":"T Osa","year":"2022","unstructured":"Osa, T., Tangkaratt, V., Sugiyama, M.: Discovering diverse solutions in deep reinforcement learning by maximizing state-action-based mutual information. Neural Netw. 152, 90\u2013104 (2022)","journal-title":"Neural Netw."},{"key":"13_CR20","doi-asserted-by":"crossref","unstructured":"Shi, J.C., Yu, Y., Da, Q., Chen, S.Y., Zeng, A.X.: Virtual-taobao: virtualizing real-world online retail environment for reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 4902\u20134909 (2019)","DOI":"10.1609\/aaai.v33i01.33014902"},{"key":"13_CR21","unstructured":"Wang, T.T., et al.: Adversarial policies beat professional-level go AIs. arXiv preprint arXiv:2211.00241 (2022)"},{"key":"13_CR22","unstructured":"Watkins, C.J.C.H.: Learning from delayed rewards. Robot. Auton. Syst. (1989)"},{"key":"13_CR23","unstructured":"Xue, W., Cai, Q., Zhan, R., Zheng, D., Jiang, P., An, B.: ResAct: Reinforcing long-term engagement in sequential recommendation with residual actor. arXiv preprint arXiv:2206.02620 (2022)"},{"key":"13_CR24","doi-asserted-by":"crossref","unstructured":"Yu, C., Yang, X., Gao, J., Yang, H., Wang, Y., Wu, Y.: Learning efficient multi-agent cooperative visual exploration. arXiv preprint arXiv:2110.05734 (2021)","DOI":"10.1007\/978-3-031-19842-7_29"},{"key":"13_CR25","unstructured":"Zahavy, T., O\u2019Donoghue, B., Barreto, A., Flennerhag, S., Mnih, V., Singh, S.: Discovering diverse nearly optimal policies with successor features. In: ICML 2021 Workshop on Unsupervised Reinforcement Learning (2021)"},{"key":"13_CR26","unstructured":"Zhou, Z., Fu, W., Zhang, B., Wu, Y.: Continuously discovering novel strategies via reward-switching policy optimization. In: International Conference on Learning Representations (2021)"}],"container-title":["Lecture Notes in Computer Science","PRICAI 2023: Trends in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-7019-3_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T00:15:53Z","timestamp":1699575353000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-7019-3_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,10]]},"ISBN":["9789819970186","9789819970193"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-7019-3_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023,11,10]]},"assertion":[{"value":"10 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRICAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific Rim International Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Jakarta","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Indonesia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 November 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 November 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pricai2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.pricai.org\/2023\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"422","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"95","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"36","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"23% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.1","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}