{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:34:00Z","timestamp":1772120040166,"version":"3.50.1"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2023,10,28]],"date-time":"2023-10-28T00:00:00Z","timestamp":1698451200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,10,28]],"date-time":"2023-10-28T00:00:00Z","timestamp":1698451200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key R &D Program of China","doi-asserted-by":"crossref","award":["2018YFB1308300"],"award-info":[{"award-number":["2018YFB1308300"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276028"],"award-info":[{"award-number":["62276028"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U20A20167"],"award-info":[{"award-number":["U20A20167"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Beijing Natural Science Foundation","award":["4202026"],"award-info":[{"award-number":["4202026"]}]},{"DOI":"10.13039\/501100003787","name":"Natural Science Foundation of Hebei Province","doi-asserted-by":"publisher","award":["F202103079"],"award-info":[{"award-number":["F202103079"]}],"id":[{"id":"10.13039\/501100003787","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Innovation Capability Improvement Plan Project of Hebei Province","award":["22567626H"],"award-info":[{"award-number":["22567626H"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1007\/s13042-023-01989-1","type":"journal-article","created":{"date-parts":[[2023,10,28]],"date-time":"2023-10-28T12:01:26Z","timestamp":1698494486000},"page":"1663-1675","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Consistent epistemic planning for multiagent deep reinforcement learning"],"prefix":"10.1007","volume":"15","author":[{"given":"Peiliang","family":"Wu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2099-7074","authenticated-orcid":false,"given":"Shicheng","family":"Luo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liqiang","family":"Tian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bingyi","family":"Mao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenbai","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,10,28]]},"reference":[{"issue":"10","key":"1989_CR1","doi-asserted-by":"publisher","first-page":"5959","DOI":"10.1002\/int.22536","volume":"36","author":"A Alshehri","year":"2021","unstructured":"Alshehri A, Miller T, Sonenberg L (2021) Modeling communication of collaborative multiagent system under epistemic planning. Int J Intell Syst 36(10):5959\u20135980","journal-title":"Int J Intell Syst"},{"key":"1989_CR2","doi-asserted-by":"crossref","unstructured":"Areces C, Fervari R, Saravia AR et\u00a0al (2021) Uncertainty-based semantics for multi-agent knowing how logics. arXiv preprint arXiv:2106.11492","DOI":"10.4204\/EPTCS.335.3"},{"key":"1989_CR3","doi-asserted-by":"crossref","unstructured":"Baier C, Funke F, Majumdar R (2021) Responsibility attribution in parameterized Markovian models. In: Proceedings of the AAAI conference on artificial intelligence, pp 11734\u201311743","DOI":"10.1609\/aaai.v35i13.17395"},{"issue":"1","key":"1989_CR4","doi-asserted-by":"publisher","first-page":"9","DOI":"10.3166\/jancl.21.9-34","volume":"21","author":"T Bolander","year":"2011","unstructured":"Bolander T, Andersen MB (2011) Epistemic planning for single-and multi-agent systems. J Appl Non-Class Logics 21(1):9\u201334","journal-title":"J Appl Non-Class Logics"},{"key":"1989_CR5","doi-asserted-by":"crossref","unstructured":"Buckingham D, Kasenberg D, Scheutz M (2020) Simultaneous representation of knowledge and belief for epistemic planning with belief revision. In: Proceedings of the international conference on principles of knowledge representation and reasoning, vol 17, pp 172\u2013181","DOI":"10.24963\/kr.2020\/18"},{"issue":"7","key":"1989_CR6","doi-asserted-by":"publisher","first-page":"7032","DOI":"10.1109\/TIE.2022.3206745","volume":"70","author":"L Chen","year":"2023","unstructured":"Chen L, Wang Y, Mo Y et al (2023) Multiagent path finding using deep reinforcement learning coupled with hot supervision contrastive loss. IEEE Trans Ind Electron 70(7):7032\u20137040. https:\/\/doi.org\/10.1109\/TIE.2022.3206745","journal-title":"IEEE Trans Ind Electron"},{"key":"1989_CR7","doi-asserted-by":"crossref","unstructured":"Engesser T, Bolander T, Mattm\u00fcller R et\u00a0al (2017) Cooperative epistemic multi-agent planning for implicit coordination. arXiv preprint arXiv:1703.02196","DOI":"10.4204\/EPTCS.243.6"},{"key":"1989_CR8","first-page":"586","volume-title":"PRICAI 2021: trends in artificial intelligence","author":"F Fabiano","year":"2021","unstructured":"Fabiano F, Burigana A, Dovier A et al (2021) Multi-agent epistemic planning with inconsistent beliefs, trust and lies. In: Pham DN, Theeramunkong T, Governatori G et al (eds) PRICAI 2021: trends in artificial intelligence. Springer International Publishing, Cham, pp 586\u2013597"},{"key":"1989_CR9","unstructured":"Fabiano F, Srivastava B, Lenchner J, et\u00a0al (2021b) E-PDDL: a standardized way of defining epistemic planning problems. arXiv preprint arXiv:2107.08739"},{"key":"1989_CR10","unstructured":"Foerster J, Assael IA, De Freitas N et al (2016) Learning to communicate with deep multi-agent reinforcement learning. In: Proceedings of the 30th international conference on neural information processing systems, pp 2145\u20132153"},{"key":"1989_CR11","doi-asserted-by":"crossref","unstructured":"Geffner H, Bonet B (2013) A concise introduction to models and methods for automated planning. In: Synthesis lectures on artificial intelligence and machine learning, vol 8, no 1, pp 1\u2013141","DOI":"10.2200\/S00513ED1V01Y201306AIM022"},{"issue":"103","key":"1989_CR12","first-page":"728","volume":"309","author":"D Gurov","year":"2022","unstructured":"Gurov D, Goranko V, Lundberg E (2022) Knowledge-based strategies for multi-agent teams playing against nature. Artif Intell 309(103):728","journal-title":"Artif Intell"},{"key":"1989_CR13","unstructured":"He K, Banerjee B, Doshi P (2021) Cooperative-competitive reinforcement learning with history-dependent rewards. In: Proceedings of the 20th international conference on autonomous agents and multiagent systems, pp 602\u2013610"},{"key":"1989_CR14","doi-asserted-by":"crossref","unstructured":"Ikeda T, Shibuya T (2022) Centralized training with decentralized execution reinforcement learning for cooperative multi-agent systems with communication delay. In: 2022 61st annual conference of the Society of Instrument and Control Engineers (SICE). IEEE, pp 135\u2013140","DOI":"10.23919\/SICE56594.2022.9905866"},{"key":"1989_CR15","unstructured":"Iqbal S, Sha F (2019) Actor-attention-critic for multi-agent reinforcement learning. In: International conference on machine learning. PMLR, pp 2961\u20132970"},{"key":"1989_CR16","doi-asserted-by":"publisher","DOI":"10.1007\/s10922-022-09696-y","author":"V Jain","year":"2023","unstructured":"Jain V, Kumar B (2023) QoS-aware task offloading in fog environment using multi-agent deep reinforcement learning. J Netw Syst Manag. https:\/\/doi.org\/10.1007\/s10922-022-09696-y","journal-title":"J Netw Syst Manag"},{"key":"1989_CR17","unstructured":"Jiang J, Lu Z (2018) Learning attentional communication for multi-agent cooperation. In: Proceedings of the 32nd international conference on neural information processing systems, pp 7265\u20137275"},{"key":"1989_CR18","unstructured":"Kong X, Xin B, Liu F et\u00a0al (2017) Revisiting the master\u2013slave architecture in multi-agent deep reinforcement learning. arXiv preprint arXiv:1712.07305"},{"key":"1989_CR19","unstructured":"Lowe R, Wu YI, Tamar A et al (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. In: Proceedings of the 31st international conference on neural information processing systems, pp 6382\u20136393"},{"key":"1989_CR20","volume-title":"Exploiting relevance to improve robustness and flexibility in plan generation and execution","author":"C Muise","year":"2014","unstructured":"Muise C (2014) Exploiting relevance to improve robustness and flexibility in plan generation and execution. University of Toronto (Canada), Toronto"},{"issue":"103","key":"1989_CR21","doi-asserted-by":"publisher","first-page":"605","DOI":"10.1016\/j.artint.2021.103605","volume":"302","author":"C Muise","year":"2022","unstructured":"Muise C, Belle V, Felli P et al (2022) Efficient multi-agent epistemic planning: teaching planners about nested belief. Artif Intell 302(103):605. https:\/\/doi.org\/10.1016\/j.artint.2021.103605","journal-title":"Artif Intell"},{"key":"1989_CR22","unstructured":"Parnika P, Diddigi RB, Danda SKR et\u00a0al (2021) Attention actor-critic algorithm for multi-agent constrained co-operative reinforcement learning. In: International conference on autonomous agents and multiagent systems. International Foundation for Autonomous Agents and Multiagent Systems (IFAAMAS)"},{"issue":"3","key":"1989_CR23","doi-asserted-by":"publisher","first-page":"349","DOI":"10.1037\/0033-2909.100.3.349","volume":"100","author":"WB Rouse","year":"1986","unstructured":"Rouse WB, Morris NM (1986) On looking into the black box: prospects and limits in the search for mental models. Psychol Bull 100(3):349","journal-title":"Psychol Bull"},{"key":"1989_CR24","first-page":"13","volume":"153","author":"T Rupprecht","year":"2022","unstructured":"Rupprecht T, Wang Y (2022) A survey for deep reinforcement learning in Markovian cyber-physical systems: common problems and solutions. Neural Netw Off J Int Neural Netw Soc 153:13\u201336","journal-title":"Neural Netw Off J Int Neural Netw Soc"},{"key":"1989_CR25","doi-asserted-by":"crossref","unstructured":"Seo S, Kennedy-Metz LR, Zenati MA et\u00a0al (2021) Towards an AI coach to infer team mental model alignment in healthcare. In: 2021 IEEE conference on cognitive and computational aspects of situation management (CogSIMA). IEEE, pp 39\u201344","DOI":"10.1109\/CogSIMA51574.2021.9475925"},{"issue":"104","key":"1989_CR26","first-page":"307","volume":"159","author":"K Shibata","year":"2023","unstructured":"Shibata K, Jimbo T, Matsubara T (2023) Deep reinforcement learning of event-triggered communication and consensus-based control for distributed cooperative transport. Robot Auton Syst 159(104):307","journal-title":"Robot Auton Syst"},{"key":"1989_CR27","doi-asserted-by":"crossref","unstructured":"Singh R, Sonenberg L, Miller T (2017) Communication and shared mental models for teams performing interdependent tasks. In: Coordination, organizations, institutions, and norms in agent systems XII: COIN 2016 international workshops, COIN@ AAMAS, Singapore, Singapore, May 9, 2016, COIN@ ECAI, The Hague, The Netherlands, August 30, 2016, Revised Selected Papers. Springer, pp 81\u201397","DOI":"10.1007\/978-3-319-46882-2_10"},{"key":"1989_CR28","doi-asserted-by":"crossref","unstructured":"Ulusoy A, Smith SL, Ding XC et\u00a0al (2011) Optimal multi-robot path planning with temporal logic constraints. In: 2011 IEEE\/RSJ international conference on intelligent robots and systems. IEEE, pp 3087\u20133092","DOI":"10.1109\/IROS.2011.6048524"},{"issue":"103","key":"1989_CR29","doi-asserted-by":"publisher","first-page":"562","DOI":"10.1016\/j.artint.2021.103562","volume":"301","author":"H Wan","year":"2021","unstructured":"Wan H, Fang B, Liu Y (2021) A general multi-agent epistemic planner based on higher-order belief change. Artif Intell 301(103):562. https:\/\/doi.org\/10.1016\/j.artint.2021.103562","journal-title":"Artif Intell"},{"key":"1989_CR30","doi-asserted-by":"crossref","unstructured":"Wu J, Sun X, Zeng A et\u00a0al (2021) Spatial intention maps for multi-agent mobile manipulation. In: 2021 IEEE international conference on robotics and automation (ICRA). IEEE, pp 8749\u20138756","DOI":"10.1109\/ICRA48506.2021.9561359"},{"key":"1989_CR31","doi-asserted-by":"publisher","DOI":"10.3390\/math11020405","author":"Y Xu","year":"2023","unstructured":"Xu Y, Wei Y, Jiang K et al (2023) Multiple UAVs path planning based on deep reinforcement learning in communication denial environment. Mathematics. https:\/\/doi.org\/10.3390\/math11020405","journal-title":"Mathematics"},{"key":"1989_CR32","unstructured":"Yang T, Tang H, Bai C et\u00a0al (2021) Exploration in deep reinforcement learning: a comprehensive survey. arXiv preprint arXiv:2109.06668"},{"key":"1989_CR33","unstructured":"Yu C, Velu A, Vinitsky E et\u00a0al (2021) The surprising effectiveness of PPO in cooperative, multi-agent games. arXiv preprint arXiv:2103.01955"},{"key":"1989_CR34","doi-asserted-by":"crossref","unstructured":"Zhou Y (2021) Ideology, censorship, and propaganda: unifying shared mental models. Available at SSRN 3821161","DOI":"10.2139\/ssrn.3821161"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-023-01989-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-023-01989-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-023-01989-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,12]],"date-time":"2024-04-12T10:20:35Z","timestamp":1712917235000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-023-01989-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,28]]},"references-count":34,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,5]]}},"alternative-id":["1989"],"URL":"https:\/\/doi.org\/10.1007\/s13042-023-01989-1","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-2576428\/v1","asserted-by":"object"},{"id-type":"doi","id":"10.21203\/rs.3.rs-2576428\/v2","asserted-by":"object"}]},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,10,28]]},"assertion":[{"value":"20 March 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 September 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 October 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they do not have any commercial or associative interest that represents a conflict of interest in connection with the work submitted.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}