{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T14:09:31Z","timestamp":1772114971786,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":47,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819755806","type":"print"},{"value":"9789819755813","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-5581-3_32","type":"book-chapter","created":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T19:02:53Z","timestamp":1722538973000},"page":"396-406","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Efficient Reinforcement Learning via Decoupling Exploration and Utilization"],"prefix":"10.1007","author":[{"given":"Jingpu","family":"Yang","sequence":"first","affiliation":[]},{"given":"Helin","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Qirui","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Zhecheng","family":"Shi","sequence":"additional","affiliation":[]},{"given":"Zirui","family":"Song","sequence":"additional","affiliation":[]},{"given":"Miao","family":"Fang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,1]]},"reference":[{"key":"32_CR1","doi-asserted-by":"publisher","first-page":"28573","DOI":"10.1109\/ACCESS.2018.2831228","volume":"6","author":"A Dorri","year":"2018","unstructured":"Dorri, A., Kanhere, S.S., Jurdak, R.: Multi-agent systems: a survey. IEEE Access 6, 28573\u201328593 (2018)","journal-title":"IEEE Access"},{"key":"32_CR2","doi-asserted-by":"crossref","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","DOI":"10.1038\/nature14236"},{"key":"32_CR3","unstructured":"Badia, A.P., et al.: Agent57: Outperforming the Atari human benchmark. In: International Conference on Machine Learning, pp. 507\u2013517. PMLR (2020)"},{"key":"32_CR4","doi-asserted-by":"crossref","unstructured":"Silver, D., et al.: Mastering the game of Go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","DOI":"10.1038\/nature16961"},{"key":"32_CR5","doi-asserted-by":"crossref","unstructured":"Yuan, L., et al.: Learning to coordinate with anyone. In: Proceedings of the Fifth International Conference on Distributed Artificial Intelligence,\u00a0pp. 1\u20139, November 2023","DOI":"10.1145\/3627676.3627678"},{"key":"32_CR6","doi-asserted-by":"crossref","unstructured":"Yuan, L., et al.: Multi-agent incentive communication via decentralized teammate modeling. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 9466\u20139474 (2022)","DOI":"10.1609\/aaai.v36i9.21179"},{"key":"32_CR7","unstructured":"Zhang, F., Jia, C., Li, Y.C., Yuan, L., Yu, Y, Zhang, Z.: Discovering generalizable multi-agent coordination skills from multi-task offline data. In: The Eleventh International Conference on Learning Representations. IEEE (2022)"},{"key":"32_CR8","unstructured":"Yuan, L., Zhang, Z., Li, L., Guan, C, Yu, Y.: A survey of progress on cooperative multi-agent reinforcement learning in open environment. arXiv preprint arXiv:2312.01058 (2023)"},{"key":"32_CR9","unstructured":"Yuan, L., Jiang, T., Li, L., Chen, F., Zhang, Z, Yu, Y.: Robust multi-agent communication via multi-view message certification. arXiv preprint arXiv:2305.13936 (2023)"},{"key":"32_CR10","unstructured":"Chen, X., Li, S., Li, H., Jiang, S., Qi, Y, Song, L.: Generative adversarial user model for reinforcement learning based recommendation system. In: International Conference on Machine Learning, pp. 1052\u20131061. PMLR (2019)"},{"key":"32_CR11","unstructured":"Sestini, A., Kuhnle, A., Bagdanov, A.D.: DeepCrawl: deep reinforcement learning for turn-based strategy games.\u00a0arXiv preprint arXiv:2012.01914 (2020)"},{"key":"32_CR12","doi-asserted-by":"crossref","unstructured":"Silver, D., et al.: Mastering the game of go without human knowledge. Nature 550(7676), 354\u2013359 (2017)","DOI":"10.1038\/nature24270"},{"key":"32_CR13","unstructured":"Thrun, S., Schwartz, A.: Issues in using function approximation for reinforcement learning. In: Proceedings of the 1993 Connectionist Models Summer School, pp. 255\u2013263. Psychology Press, March 2014"},{"key":"32_CR14","first-page":"20132","volume":"34","author":"S Fujimoto","year":"2021","unstructured":"Fujimoto, S., Gu, S.S.: A minimalist approach to offline reinforcement learning. Adv. Neural. Inf. Process. Syst. 34, 20132\u201320145 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"32_CR15","unstructured":"Fujimoto, S., Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. In: International Conference on Machine Learning, pp. 1587\u20131596. PMLR (2018)"},{"key":"32_CR16","unstructured":"Ciosek, K., Vuong, Q., Loftin, R., Hofmann, K.: Better exploration with optimistic actor-critic. preprint arXiv: Machine Learning (2019)"},{"key":"32_CR17","doi-asserted-by":"crossref","unstructured":"Lobel, S., Gottesman, O., Allen, C., Bagaria, A., Konidaris, G.: Optimistic initialization for exploration in continuous control. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 7612\u20137619 (2022)","DOI":"10.1609\/aaai.v36i7.20727"},{"key":"32_CR18","unstructured":"Fujimoto, S., Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. preprint arXiv:1802.09477(2018)"},{"key":"32_CR19","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (2018)"},{"key":"32_CR20","doi-asserted-by":"crossref","unstructured":"Hu, Y., Geng, J., Wang, C., Keller, J, Scherer, S.: Off-policy evaluation with online adaptation for robot exploration in challenging environments.\u00a0IEEE Robot. Autom. Lett. (2023)","DOI":"10.1109\/LRA.2023.3271520"},{"key":"32_CR21","unstructured":"Shalev-Shwartz, S., Shammah, S, Shashua, A.: Safe, multi-agent, reinforcement learning for autonomous driving.\u00a0arXiv preprint arXiv:1610.03295 (2016)"},{"key":"32_CR22","unstructured":"Osband, I., Blundell, C., Pritzel, A., Van Roy, B.: Deep exploration via bootstrapped DQN. In: Advances in Neural Information Processing Systems, vol. 29 (2016)"},{"key":"32_CR23","unstructured":"Dewey, D.: Reinforcement learning and the reward engineering principle. In: 2014 AAAI Spring Symposium Series (2014)"},{"issue":"3","key":"32_CR24","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1016\/0005-1098(89)90002-2","volume":"25","author":"CE Garcia","year":"1989","unstructured":"Garcia, C.E., Prett, D.M., Morari, M.: Model predictive control: theory and practice\u2014a survey. Automatica 25(3), 335\u2013348 (1989)","journal-title":"Automatica"},{"key":"32_CR25","doi-asserted-by":"crossref","unstructured":"Yu, Y.: Towards sample efficient reinforcement learning. In: IJCAI, pp. 5739\u20135743 (2018)","DOI":"10.24963\/ijcai.2018\/820"},{"key":"32_CR26","unstructured":"Kurutach, T., Clavera, I., Duan, Y., Tamar, A., Abbeel, P.: Model-ensemble trust-region policy optimization. arXiv preprint arXiv:1802.10592 (2018)"},{"key":"32_CR27","unstructured":"Finn, C., Abbeel, P., Levine, S.: Model-agnostic meta-learning for fast adaptation of deep networks. In: International Conference on Machine Learning, pp. 1126\u20131135. PMLR (2017)"},{"key":"32_CR28","first-page":"12849","volume":"34","author":"T Moskovitz","year":"2021","unstructured":"Moskovitz, T., Parker-Holder, J., Pacchiano, A., Arbel, M., Jordan, M.: Tactical optimism and pessimism for deep reinforcement learning. Adv. Neural. Inf. Process. Syst. 34, 12849\u201312863 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"32_CR29","unstructured":"Li, F., et al.: Improving exploration in actor\u2013critic with weakly pessimistic value estimation and optimistic policy optimization. IEEE Trans. Neural Netw. Learn. Syst. (2022)"},{"issue":"4\u20135","key":"32_CR30","doi-asserted-by":"publisher","first-page":"698","DOI":"10.1177\/0278364920987859","volume":"40","author":"J Ibarz","year":"2021","unstructured":"Ibarz, J., Tan, J., Finn, C., Kalakrishnan, M., Pastor, P., Levine, S.: How to train your robot with deep reinforcement learning: lessons we have learned. Int. J. Robot. Res. 40(4\u20135), 698\u2013721 (2021)","journal-title":"Int. J. Robot. Res."},{"key":"32_CR31","doi-asserted-by":"publisher","first-page":"107744","DOI":"10.1016\/j.ijepes.2021.107744","volume":"136","author":"Y Zhang","year":"2022","unstructured":"Zhang, Y., Shi, X., Zhang, H., Cao, Y., Terzija, V.: Review on deep learning applications in frequency analysis and control of modern power system. Int. J. Electr. Power Energy Syst. 136, 107744 (2022)","journal-title":"Int. J. Electr. Power Energy Syst."},{"key":"32_CR32","doi-asserted-by":"crossref","unstructured":"Stadler, M., Banfi, J., Roy, N.: Approximating the value of collaborative team actions for efficient multiagent navigation in uncertain graphs. In: Proceedings of the International Conference on Automated Planning and Scheduling, pp. 677\u2013685 (2023)","DOI":"10.1609\/icaps.v33i1.27250"},{"key":"32_CR33","doi-asserted-by":"crossref","unstructured":"Bruckner, R., Heekeren, H.R., Ostwald, D.: Belief states and categorical-choice biases determine reward-based learning under perceptual uncertainty, pp. 2020-09. bioRxiv (2020)","DOI":"10.1101\/2020.09.18.303495"},{"key":"32_CR34","unstructured":"Ciosek, K., Vuong, Q., Loftin, R., Hofmann, K.: Better exploration with optimistic actor critic. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"32_CR35","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning.\u00a0arXiv preprint arXiv:1509.02971 (2015)"},{"key":"32_CR36","unstructured":"Hasselt, H.: Double Q-learning. In: Advances in Neural Information Processing Systems, vol. 23 (2010)"},{"key":"32_CR37","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1928\u20131937. PMLR (2016)"},{"key":"32_CR38","unstructured":"Houthooft, R., Chen, X., Duan, Y., Schulman, J., De Turck, F., Abbeel, P.: VIME: variational information maximizing exploration. In: Advances in Neural Information Processing Systems, vol. 29 (2016)"},{"key":"32_CR39","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: International Conference on Machine Learning, pp. 1889\u20131897. PMLR (2015)"},{"key":"32_CR40","unstructured":"Tassa, Y., et al.: Deepmind control suite.\u00a0arXiv preprint arXiv:1801.00690 (2018)"},{"key":"32_CR41","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms.\u00a0arXiv preprint arXiv:1707.06347 (2017)"},{"key":"32_CR42","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. preprint arXiv: Learning (2017)"},{"key":"32_CR43","unstructured":"Burda, Y., Edwards, H., Storkey, A., Klimov, O.: Exploration by random network distillation.\u00a0arXiv preprint arXiv:1810.12894 (2018)"},{"key":"32_CR44","unstructured":"Pathak, D., Gandhi, D., Gupta, A.: Self-supervised exploration via disagreement. In: International Conference on Machine Learning, pp. 5062\u20135071. PMLR (2019)"},{"key":"32_CR45","unstructured":"Chen, X., Wang, C., Zhou, Z., Ross, K.: Randomized ensembled double Q-learning: Learning fast without a model.\u00a0arXiv preprint arXiv:2101.05982 (2021)"},{"key":"32_CR46","doi-asserted-by":"crossref","unstructured":"Todorov, E., Erez, T., MuJoCo, Y.T.: A physics engine for model-based control. In: Proceedings of the 2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 5026\u20135033 (2012)","DOI":"10.1109\/IROS.2012.6386109"},{"key":"32_CR47","unstructured":"Brockman, G., et al.: OpenAI gym. arXiv preprint arXiv:1606.01540 (2016)"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-5581-3_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,1]],"date-time":"2024-08-01T19:18:26Z","timestamp":1722539906000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-5581-3_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819755806","9789819755813"],"references-count":47,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-5581-3_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"1 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tianjin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/2024\/index.htm","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}