{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,12]],"date-time":"2026-05-12T16:13:55Z","timestamp":1778602435540,"version":"3.51.4"},"publisher-location":"Singapore","reference-count":21,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819698936","type":"print"},{"value":"9789819698943","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-9894-3_1","type":"book-chapter","created":{"date-parts":[[2025,7,25]],"date-time":"2025-07-25T19:56:33Z","timestamp":1753473393000},"page":"3-14","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["MACMPE: Exploration Framework for Multi-agent Reinforcement Learning via Causal Episodic Memory and Potential Evolution"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-8053-7859","authenticated-orcid":false,"given":"Liqiang","family":"Tian","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1228-2757","authenticated-orcid":false,"given":"Peiliang","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2867-7512","authenticated-orcid":false,"given":"Qian","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6547-5150","authenticated-orcid":false,"given":"Bingyi","family":"Mao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7683-2776","authenticated-orcid":false,"given":"Wenbai","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,7,26]]},"reference":[{"key":"1_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.112103","volume":"299","author":"J Kim","year":"2024","unstructured":"Kim, J., Lee, Y.J., Kwak, M., Park, Y., Kim, S.B.: DynaSTI: Dynamics modeling with sequential temporal information for reinforcement learning in Atari. Knowl. Based Syst. 299, 112103 (2024)","journal-title":"Knowl. Based Syst."},{"key":"1_CR2","first-page":"501","volume-title":"ICRA","author":"C Zhao","year":"2024","unstructured":"Zhao, C., Jie, X., Peng, R., Chen, X., Mei, K., Lan, X.: Experience consistency distillation continual reinforcement learning for robotic manipulation tasks. In: ICRA, pp. 501\u2013507 (2024)"},{"key":"1_CR3","first-page":"9220","volume-title":"ICRA","author":"H Qian","year":"2024","unstructured":"Qian, H., et al.: Leveraging the efficiency of multi-task robot manipulation via task-evoked planner and reinforcement learning. In: ICRA, pp. 9220\u20139226 (2024)"},{"key":"1_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2023.111154","volume":"151","author":"C-L Cheng","year":"2024","unstructured":"Cheng, C.-L., Hsu, C.-C.J., Saeedvand, S., Jo, J.H.: Multi-objective crowd-aware robot navigation system using deep reinforcement learning. Appl. Soft Comput. 151, 111154 (2024)","journal-title":"Appl. Soft Comput."},{"key":"1_CR5","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2024.124906","volume":"256","author":"Z Cui","year":"2024","unstructured":"Cui, Z., Guan, W., Zhang, X.: USV formation navigation decision-making through hybrid deep reinforcement learning using self-attention mechanism. Expert Syst. Appl. 256, 124906 (2024)","journal-title":"Expert Syst. Appl."},{"issue":"2","key":"1_CR6","doi-asserted-by":"publisher","first-page":"895","DOI":"10.1007\/s10462-021-09996-w","volume":"55","author":"S Gronauer","year":"2022","unstructured":"Gronauer, S., Diepold, K.: Multi-agent deep reinforcement learning: A survey. Artif. Intell. Rev. 55(2), 895\u2013943 (2022)","journal-title":"Artif. Intell. Rev."},{"key":"1_CR7","first-page":"6131","volume-title":"ICML","author":"K Lee","year":"2021","unstructured":"Lee, K., Laskin, M., Srinivas, A., Abbeel, P.: SUNRISE: A simple unified framework for ensemble learning in deep reinforcement learning. In: ICML, pp. 6131\u20136141 (2021)"},{"key":"1_CR8","volume-title":"ICLR","author":"J Wang","year":"2021","unstructured":"Wang, J., Ren, Z., Liu, T., Yu, Y., Zhang, C.: QPLEX: Duplex dueling multi-agent q-learning. In: ICLR (2021)"},{"key":"1_CR9","first-page":"6260","volume-title":"IROS","author":"Z Zhu","year":"2020","unstructured":"Zhu, Z., Biyik, E., Sadigh, D.: Multi-agent safe planning with gaussian processes. In: IROS, pp. 6260\u20136267 (2020)"},{"key":"1_CR10","volume-title":"ICLR","author":"R Chitnis","year":"2020","unstructured":"Chitnis, R., Tulsiani, S., Gupta, S., Gupta, A.: Intrinsic motivation for encouraging synergistic behavior. In: ICLR (2020)"},{"key":"1_CR11","first-page":"4380","volume-title":"ICML","author":"H Hao","year":"2021","unstructured":"Hao, H., Ye, J., Zhu, G., Ren, Z., Zhang, C.: Generalizable episodic memory for deep reinforcement learning. In: ICML, pp. 4380\u20134390 (2021)"},{"key":"1_CR12","volume-title":"ICLR","author":"H Na","year":"2024","unstructured":"Na, H., Seo, Y., Moon, I.-C.: Efficient episodic memory utilization of cooperative multi-agent reinforcement learning. In: ICLR (2024)"},{"key":"1_CR13","volume-title":"ICLR","author":"J Hao","year":"2023","unstructured":"Hao, J., Li, P., Tang, H., Zheng, Y., Xian, F., Meng, Z.: ERL-Re$2$: Efficient evolutionary reinforcement learning with shared state representation and individual policy representation. In: ICLR (2023)"},{"key":"1_CR14","first-page":"12979","volume-title":"ICML","author":"P Li","year":"2022","unstructured":"Li, P., et al.: PMIC: Improving multi-agent reinforcement learning with progressive mutual information collaboration. In: ICML, pp. 12979\u201312997 (2022)"},{"key":"1_CR15","first-page":"2961","volume-title":"ICML","author":"S Iqbal","year":"2019","unstructured":"Iqbal, S., Sha, F.: Actor-attention-critic for multi-agent reinforcement learning. In: ICML, pp. 2961\u20132970 (2019)"},{"key":"1_CR16","first-page":"6379","volume-title":"NIPS","author":"R Lowe","year":"2017","unstructured":"Lowe, R., Yi, W., Tamar, A., Harb, J., Abbeel, P., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. In: NIPS, pp. 6379\u20136390 (2017)"},{"key":"1_CR17","first-page":"24611","volume-title":"NeurIPS","author":"Y Chao","year":"2022","unstructured":"Chao, Y., Velu, A., Vinitsky, E., Jiaxuan Gao, Y., Wang, A.M., Bayen, Y.W.: The surprising effectiveness of ppo in cooperative multi-agent games. In: NeurIPS, pp. 24611\u201324624 (2022)"},{"key":"1_CR18","volume-title":"NeurIPS","author":"M Wen","year":"2022","unstructured":"Wen, M., et al.: Multi-agent reinforcement learning is a sequence modeling problem. In: NeurIPS (2022)"},{"key":"1_CR19","first-page":"9260","volume-title":"ICML","author":"B Huang","year":"2022","unstructured":"Huang, B., et al.: Action-sufficient state representation learning for control with structural constraints. In: ICML, pp. 9260\u20139279 (2022)"},{"key":"1_CR20","first-page":"1225","volume":"12","author":"S Shimizu","year":"2011","unstructured":"Shimizu, S., et al.: DirectLiNGAM: A direct method for learning a linear non-gaussian structural equation model. J. Mach. Learn. Res. 12, 1225\u20131248 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"1_CR21","first-page":"1616","volume-title":"AAMAS","author":"P Parnika","year":"2021","unstructured":"Parnika, P., Diddigi, R.B., Danda, S.K.R., Bhatnagar, S.: Attention actor-critic algorithm for multi-agent constrained co-operative reinforcement learning. In: AAMAS, pp. 1616\u20131618 (2021)"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-9894-3_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T01:37:32Z","timestamp":1774661852000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-9894-3_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819698936","9789819698943"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-9894-3_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"26 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ningbo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/icg\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}