{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T03:21:51Z","timestamp":1775186511988,"version":"3.50.1"},"publisher-location":"Cham","reference-count":47,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030640958","type":"print"},{"value":"9783030640965","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-64096-5_5","type":"book-chapter","created":{"date-parts":[[2020,11,25]],"date-time":"2020-11-25T00:30:27Z","timestamp":1606264227000},"page":"55-68","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Lyapunov-Based Reinforcement Learning for Decentralized Multi-agent Control"],"prefix":"10.1007","author":[{"given":"Qingrui","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Hao","family":"Dong","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Pan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,11,25]]},"reference":[{"key":"5_CR1","unstructured":"Ackermann, J., Gabler, V., Osa, T., Sugiyama, M.: Reducing overestimation bias in multi-agent domains using double centralized critics. arXiv preprint arXiv:1910.01465 (2019)"},{"issue":"1","key":"5_CR2","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1177\/0278364919887447","volume":"30","author":"M Andrychowicz","year":"2020","unstructured":"Andrychowicz, M., et al.: Learning dexterous in-hand manipulation. Int. J. Robot. Res. 30(1), 3\u201320 (2020). https:\/\/doi.org\/10.1177\/0278364919887447","journal-title":"Int. J. Robot. Res."},{"key":"5_CR3","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1016\/j.arcontrol.2008.03.004","volume":"32","author":"L Bakule","year":"2008","unstructured":"Bakule, L.: Decentralized control: an overview. Annu. Rev. Control 32, 87\u201398 (2008). https:\/\/doi.org\/10.1016\/j.arcontrol.2008.03.004","journal-title":"Annu. Rev. Control"},{"key":"5_CR4","doi-asserted-by":"publisher","unstructured":"van den Berg, J., Lin, M.C., Manocha, D.: Reciprocal velocity obstacles for real-time multi-agent navigation. In: Proceedings of the IEEE International Conference on Robotics and Automation (ICRA), Pasadena, CA, USA. IEEE, May 2008. https:\/\/doi.org\/10.1109\/ROBOT.2008.4543489","DOI":"10.1109\/ROBOT.2008.4543489"},{"key":"5_CR5","unstructured":"Berkenkamp, F., Turchetta, M., Schoellig, A.P., Krause, A.: Safe model-based reinforcement learning with stability guarantees. arXiv preprint arXiv:1705.08551 (2017)"},{"issue":"1","key":"5_CR6","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1049\/ip-sen:19971023","volume":"144","author":"B Burmeister","year":"1997","unstructured":"Burmeister, B., Haddadi, A., Matylis, G.: Application of multi-agent systems in traffic and transportation. IEE Proc. Softw. Eng. 144(1), 51\u201360 (1997). https:\/\/doi.org\/10.1049\/ip-sen:19971023","journal-title":"IEE Proc. Softw. Eng."},{"key":"5_CR7","doi-asserted-by":"publisher","unstructured":"Chen, Y.F., Everett, M., Liu, M., How, J.P.: Socially aware motion planning with deep reinforcement learning. In: Proceedings of 2017 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), Vancouver, BC, Canada, September 2017. https:\/\/doi.org\/10.1109\/IROS.2017.8202312","DOI":"10.1109\/IROS.2017.8202312"},{"key":"5_CR8","doi-asserted-by":"publisher","unstructured":"Chen, Y.F., Liu, M., Everett, M., How, J.P.: Decentralized non-communicating multiagent collision avoidance with deep reinforcement learning. In: Proceedings of 2017 IEEE International Conference on Robotics and Automation, Singapore, Singapore, June 2017. https:\/\/doi.org\/10.1109\/ICRA.2017.7989037","DOI":"10.1109\/ICRA.2017.7989037"},{"issue":"3","key":"5_CR9","doi-asserted-by":"publisher","first-page":"825","DOI":"10.1109\/TCSI.2014.2367575","volume":"62","author":"Z Cheng","year":"2015","unstructured":"Cheng, Z., Zhang, H.T., Fan, M.C., Chen, G.: Distributed consensus of multi-agent systems with input constraints: a model predictive control approach. IEEE Trans. Circuits Syst. I Regul. Paper 62(3), 825\u2013834 (2015). https:\/\/doi.org\/10.1109\/TCSI.2014.2367575","journal-title":"IEEE Trans. Circuits Syst. I Regul. Paper"},{"key":"5_CR10","unstructured":"Chow, Y., Nachum, O., Duenez-Guzman, E., Ghavamzadeh, M.: A Lyapunov-based approach to safe reinforcement learning. arXiv preprint arXiv:1805.07708 (2018)"},{"key":"5_CR11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-53622-3","volume-title":"Graph Theory","author":"R Diestel","year":"2000","unstructured":"Diestel, R.: Graph Theory, 2nd edn. Springer, New York (2000). https:\/\/doi.org\/10.1007\/978-3-662-53622-3","edition":"2"},{"key":"5_CR12","unstructured":"Everett, M., Chen, Y.F., How, J.P.: Collision avoidance in pedestrian-rich environments with deep reinforcement learning. arXiv preprint arXiv:1910.11689 (2019)"},{"issue":"5","key":"5_CR13","doi-asserted-by":"publisher","first-page":"852","DOI":"10.1109\/TRA.2002.803466","volume":"18","author":"JT Feddema","year":"2002","unstructured":"Feddema, J.T., Lewis, C., Schoenwald, D.A.: Decentralized control of cooperative robotic vehicles: theory and application. IEEE Trans. Robot. Autom. 18(5), 852\u2013864 (2002). https:\/\/doi.org\/10.1109\/TRA.2002.803466","journal-title":"IEEE Trans. Robot. Autom."},{"key":"5_CR14","doi-asserted-by":"crossref","unstructured":"Foerster, J.N., Farquhar, G., Afouras, T., Nardelli, N., Whiteson, S.: Counterfactual multi-agent policy gradients. arXiv preprint arXiv:1705.08926 (2017)","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"5_CR15","unstructured":"Fujimoto, S., van Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. arXiv preprint arXiv:1802.09477 (2018)"},{"key":"5_CR16","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4613-0163-9","volume-title":"Algebraic Graph Theory","author":"C Godsil","year":"2000","unstructured":"Godsil, C., Royle, G.: Algebraic Graph Theory. Springer, New York (2000). https:\/\/doi.org\/10.1007\/978-1-4613-0163-9"},{"issue":"9","key":"5_CR17","doi-asserted-by":"publisher","first-page":"1275","DOI":"10.1016\/S0005-1098(00)00038-8","volume":"36","author":"Y Guo","year":"2000","unstructured":"Guo, Y., Hill, D.J., Wang, Y.: Nonlinear decentralized control of large-scale power systems. Automatica 36(9), 1275\u20131289 (2000). https:\/\/doi.org\/10.1016\/S0005-1098(00)00038-8","journal-title":"Automatica"},{"key":"5_CR18","unstructured":"Haarnoja, T., et al.: Soft actor-critic algorithms and applications. arXiv preprint arXiv:1812.05905 (2018)"},{"key":"5_CR19","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. arXiv preprint arXiv:1801.01290 (2018)"},{"key":"5_CR20","unstructured":"Han, M., Tian, Y., Zhang, L., Wang, J., Pan, W.: $$h_{\\infty }$$ model-free reinforcement learning with robust stability guarantee. arXiv preprint arXiv:1911.02875 (2019)"},{"key":"5_CR21","doi-asserted-by":"crossref","unstructured":"van Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double q-learning. arXiv preprint arXiv:1509.06461 (2015)","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"5_CR22","unstructured":"Iqbal, S., Sha, F.: Actor-attention-critic for multi-agent reinforcement learning. arXiv preprint arXiv:1810.0291 (2019)"},{"key":"5_CR23","doi-asserted-by":"publisher","first-page":"2105","DOI":"10.1016\/j.automatica.2006.07.008","volume":"42","author":"T Keviczky","year":"2006","unstructured":"Keviczky, T., Borrelli, F., Balas, G.J.: Decentralized receding horizon control for large scale dynamically decoupled systems. Automatica 42, 2105\u20132115 (2006). https:\/\/doi.org\/10.1016\/j.automatica.2006.07.008","journal-title":"Automatica"},{"key":"5_CR24","volume-title":"Nonlinear Systems","author":"HK Khalil","year":"2001","unstructured":"Khalil, H.K.: Nonlinear Systems, 3rd edn. Prentice Hall, Upper Saddle River (2001)","edition":"3"},{"key":"5_CR25","unstructured":"Kiran, B.R., et al.: Deep reinforcement learning for autonomous driving: a survey. arXiv preprint arXiv:2002.00444 (2020)"},{"key":"5_CR26","unstructured":"Levine, S.: Reinforcement learning and control as probabilistic inference: tutorial and review. arXiv preprint arXiv:1805.00909 (2018)"},{"key":"5_CR27","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)"},{"key":"5_CR28","doi-asserted-by":"publisher","unstructured":"Lin, J., Morse, A., Anderson, B.: Lenient learners in cooperative multiagent systems. In: Proceedings of 42nd IEEE International Conference on Decision and Control (IEEE Cat. No. 03CH37475), Maui, HI, USA, December 2003. https:\/\/doi.org\/10.1109\/CDC.2003.1272825","DOI":"10.1109\/CDC.2003.1272825"},{"key":"5_CR29","unstructured":"Lowe, R., Wu, Y., Tamar, A., Harb, J., Abbeel, P., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. arXiv preprint arXiv:1706.02275 (2018)"},{"key":"5_CR30","unstructured":"Kalashnikov, D.: QT-Opt: scalable deep reinforcement learning for vision-based robotic manipulation. arXiv preprint arXiv:1806.10293 (2018)"},{"issue":"6","key":"5_CR31","doi-asserted-by":"publisher","first-page":"789","DOI":"10.1016\/S0005-1098(99)00214-9","volume":"36","author":"D Mayne","year":"2000","unstructured":"Mayne, D., Rawlings, J., Rao, C., Scokaert, P.O.M.: Constrained model predictive control: stability and optimality. Automatica 36(6), 789\u2013814 (2000). https:\/\/doi.org\/10.1016\/S0005-1098(99)00214-9","journal-title":"Automatica"},{"key":"5_CR32","doi-asserted-by":"publisher","unstructured":"Olfati-Saber, R., Shamma, J.: Consensus filters for sensor networks and distributed sensor fusion. In: Proceedings of 44-th IEEE International Conference on Decision and Control, Seville, Spain, December 2005. https:\/\/doi.org\/10.1109\/CDC.2005.1583238","DOI":"10.1109\/CDC.2005.1583238"},{"key":"5_CR33","doi-asserted-by":"publisher","unstructured":"Olfati-Saber, R., Fax, J.A., Murray, R.M.: Consensus and cooperation in networked multi-agent systems. Proc. IEEE 95(1), (2007). https:\/\/doi.org\/10.1109\/JPROC.2006.887293","DOI":"10.1109\/JPROC.2006.887293"},{"issue":"11","key":"5_CR34","doi-asserted-by":"publisher","first-page":"2137","DOI":"10.1080\/00207170902948027","volume":"82","author":"W Ren","year":"2009","unstructured":"Ren, W.: Distributed leaderless consensus algorithms for networked Euler-Lagrange systems. Int. J. Control 82(11), 2137\u20132149 (2009). https:\/\/doi.org\/10.1080\/00207170902948027","journal-title":"Int. J. Control"},{"issue":"1","key":"5_CR35","doi-asserted-by":"publisher","first-page":"706","DOI":"10.2514\/1.9287","volume":"27","author":"W Ren","year":"2004","unstructured":"Ren, W., Beard, R.W.: Decentralized scheme for spacecraft formation flying via the virtual structure approach. J. Guid. Control Dyn. 27(1), 706\u2013716 (2004). https:\/\/doi.org\/10.2514\/1.9287","journal-title":"J. Guid. Control Dyn."},{"issue":"1","key":"5_CR36","doi-asserted-by":"publisher","first-page":"347","DOI":"10.1109\/TIE.2013.2245612","volume":"61","author":"H Rezaee","year":"2014","unstructured":"Rezaee, H., Abdollahi, F.: A decentralized cooperative control scheme with obstacle avoidance for a team of mobile robots. IEEE Trans. Industr. Electron. 61(1), 347\u2013354 (2014). https:\/\/doi.org\/10.1109\/TIE.2013.2245612","journal-title":"IEEE Trans. Industr. Electron."},{"key":"5_CR37","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: Proceedings of the 31st International Conference on Machine Learning, Lille, France, pp. 1889\u20131897, June 2015"},{"key":"5_CR38","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"5_CR39","volume-title":"Reinforcement Learning: An Introductions","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introductions, 2nd edn. The MIT Press, Cambridge (2018)","edition":"2"},{"key":"5_CR40","doi-asserted-by":"publisher","unstructured":"V\u00e1s\u00e1rhelyi, G., Vir\u00e1gh, C., Somorjai, G., Nepusz, T., Eiben, A.E., Vicsek, T.: Optimized flocking of autonomous drones in confined environments. Sci. Robot. 3(20), (2018). https:\/\/doi.org\/10.1126\/scirobotics.aat3536","DOI":"10.1126\/scirobotics.aat3536"},{"issue":"5","key":"5_CR41","doi-asserted-by":"publisher","first-page":"1731","DOI":"10.1109\/TCST.2012.2218815","volume":"21","author":"J Wang","year":"2013","unstructured":"Wang, J., Xin, M.: Integrated optimal formation control of multiple unmanned aerial vehicles. IEEE Trans. Control Syst. Technol. 21(5), 1731\u20131744 (2013). https:\/\/doi.org\/10.1109\/TCST.2012.2218815","journal-title":"IEEE Trans. Control Syst. Technol."},{"key":"5_CR42","unstructured":"Yang, Y., Luo, R., Li, M., Zhou, M., Zhang, W., Wang, J.: Mean field multi-agent reinforcement learning. arXiv preprint arXiv:1802.05438 (2018)"},{"key":"5_CR43","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1016\/j.ast.2018.05.029","volume":"79","author":"Q Zhang","year":"2018","unstructured":"Zhang, Q., Liu, H.H.T.: Aerodynamic model-based robust adaptive control for close formation flight. Aerosp. Sci. Technol. 79, 5\u201316 (2018). https:\/\/doi.org\/10.1016\/j.ast.2018.05.029","journal-title":"Aerosp. Sci. Technol."},{"issue":"11","key":"5_CR44","doi-asserted-by":"publisher","first-page":"8818","DOI":"10.1109\/TIE.2018.2811367","volume":"65","author":"Q Zhang","year":"2018","unstructured":"Zhang, Q., Liu, H.H.T.: UDE-based robust command filtered backstepping control for close formation flight. IEEE Trans. Industr. Electron. 65(11), 8818\u20138827 (2018). https:\/\/doi.org\/10.1109\/TIE.2018.2811367. Accessed 12 Mar 2018","journal-title":"IEEE Trans. Industr. Electron."},{"key":"5_CR45","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Pan, W., Reppa, V.: Model-reference reinforcement learning control of autonomous surface vehicles with uncertainties. arXiv preprint arXiv:2003.13839 (2020)","DOI":"10.1109\/CDC42340.2020.9304347"},{"key":"5_CR46","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Pan, W., Reppa, V.: Model-reference reinforcement learning for collision-free tracking control of autonomous surface vehicles. arXiv preprint arXiv:2008.07240 (2020)","DOI":"10.1109\/CDC42340.2020.9304347"},{"key":"5_CR47","doi-asserted-by":"publisher","unstructured":"Ziebart, B.D.: Modeling Purposeful Adaptive Behavior with the Principle of Maximum Causal Entropy, December 2010. https:\/\/doi.org\/10.1184\/R1\/6720692.v1. https:\/\/kilthub.cmu.edu\/articles\/Modeling_Purposeful_Adaptive_Behavior_with_the_Principle_of_Maximum_Causal_Entropy\/6720692","DOI":"10.1184\/R1\/6720692.v1"}],"container-title":["Lecture Notes in Computer Science","Distributed Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-64096-5_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,29]],"date-time":"2022-11-29T17:26:23Z","timestamp":1669742783000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-64096-5_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030640958","9783030640965"],"references-count":47,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-64096-5_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"25 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Distributed Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Nanjing","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dai22020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.adai.ai\/dai\/2020\/2020.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"22","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"9","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"41% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.77","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1.4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the Corona pandemic this event was held virtually.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}