{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T16:38:41Z","timestamp":1725986321297},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319973036"},{"type":"electronic","value":"9783319973043"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-97304-3_15","type":"book-chapter","created":{"date-parts":[[2018,7,26]],"date-time":"2018-07-26T14:34:06Z","timestamp":1532615646000},"page":"191-203","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Decentralized Multiagent Reinforcement Learning for Efficient Robotic Control by Coordination Graphs"],"prefix":"10.1007","author":[{"given":"Chao","family":"Yu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dongxu","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiankang","family":"Ren","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongwei","family":"Ge","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liang","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,7,27]]},"reference":[{"key":"15_CR1","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. The MIT Press, Cambridge (1998)"},{"issue":"11","key":"15_CR2","doi-asserted-by":"publisher","first-page":"1238","DOI":"10.1177\/0278364913495721","volume":"32","author":"J Kober","year":"2013","unstructured":"Kober, J., Peters, J.: Reinforcement learning in robotics: a survey. Int. J. Robot. Res. 32(11), 1238\u20131274 (2013)","journal-title":"Int. J. Robot. Res."},{"issue":"12","key":"15_CR3","doi-asserted-by":"publisher","first-page":"3083","DOI":"10.1109\/TNNLS.2015.2403394","volume":"26","author":"C Yu","year":"2015","unstructured":"Yu, C., Zhang, M., Ren, F., Tan, G.: Emotional multiagent reinforcement learning in spatial social dilemmas. IEEE Trans. Neural Netw. Learn. Syst. 26(12), 3083\u20133096 (2015)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"15_CR4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3","volume-title":"Reinforcement Learning. Adaptation, Learning, and Optimization","author":"M Wiering","year":"2012","unstructured":"Wiering, M., Van Otterlo, M.: Reinforcement Learning. Adaptation, Learning, and Optimization. Springer, Berlin (2012). \nhttps:\/\/doi.org\/10.1007\/978-3-642-27645-3"},{"issue":"2","key":"15_CR5","doi-asserted-by":"publisher","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L Busoniu","year":"2008","unstructured":"Busoniu, L., Babuska, R., De Schutter, B.: A comprehensive survey of multiagent reinforcement learning. IEEE Trans. Syst. Man Cybern. Part C Appl. Rev. 38(2), 156\u2013172 (2008)","journal-title":"IEEE Trans. Syst. Man Cybern. Part C Appl. Rev."},{"key":"15_CR6","first-page":"1789","volume":"7","author":"JR Kok","year":"2006","unstructured":"Kok, J.R., Vlassis, N.: Collaborative multiagent reinforcement learning by payoff propagation. J. Mach. Learn. Res. 7, 1789\u20131828 (2006)","journal-title":"J. Mach. Learn. Res."},{"key":"15_CR7","unstructured":"Guestrin, C., Lagoudakis, M., Parr, R.: Coordinated reinforcement learning. In: ICML 2002, pp. 227\u2013234 (2002)"},{"issue":"2","key":"15_CR8","first-page":"71","volume":"2","author":"M Grana","year":"2011","unstructured":"Grana, M., Fernandez-Gauna, B., Lopez-Guede, J.M.: Cooperative multi-agent reinforcement learning for multi-component robotic systems: guidelines for future research. Paladyn 2(2), 71\u201381 (2011)","journal-title":"Paladyn"},{"issue":"3\u20134","key":"15_CR9","first-page":"41","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins, C.J.C.H., Dayan, P.: Q-learning. Mach. Learn. 8(3\u20134), 41 (1992)","journal-title":"Mach. Learn."},{"key":"15_CR10","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"548","DOI":"10.1007\/978-3-540-32256-6_51","volume-title":"RoboCup 2004: Robot Soccer World Cup VIII","author":"Y Takahashi","year":"2005","unstructured":"Takahashi, Y., Edazawa, K., Asada, M.: Modular learning system and scheduling for behavior acquisition in multi-agent environment. In: Nardi, D., Riedmiller, M., Sammut, C., Santos-Victor, J. (eds.) RoboCup 2004. LNCS (LNAI), vol. 3276, pp. 548\u2013555. Springer, Heidelberg (2005). \nhttps:\/\/doi.org\/10.1007\/978-3-540-32256-6_51"},{"issue":"1\u20132","key":"15_CR11","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1023\/A:1022140919877","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto, A.G., Mahadevan, S.: Recent advances in hierarchical reinforcement learning. Discret. Event Dyn. Syst. 13(1\u20132), 41\u201377 (2003)","journal-title":"Discret. Event Dyn. Syst."},{"key":"15_CR12","unstructured":"Duan, Y., Chen, X., Houthooft, R., Schulman, J., Abbeel, P.: Benchmarking deep reinforcement learning for continuous control, pp. 1329\u20131338 (2016)"},{"issue":"2","key":"15_CR13","first-page":"1889","volume":"2","author":"J Schulman","year":"2015","unstructured":"Schulman, J., Levine, S., Moritz, P., Jordan, M.I., Abbeel, P.: Trust region policy optimization. Comput. Sci. 2(2), 1889\u20131897 (2015)","journal-title":"Comput. Sci."},{"key":"15_CR14","unstructured":"Silver, D., Lever, G., Heess, N., Degris, T., Wierstra, D., Riedmiller, M.: Deterministic policy gradient algorithms. In: ICML 2014, pp. 387\u2013395 (2014)"},{"key":"15_CR15","volume-title":"A Survey on Policy Search for Robotics","author":"MP Deisenroth","year":"2013","unstructured":"Deisenroth, M.P., Neumann, G., Peters, J.: A Survey on Policy Search for Robotics. Now Publishers Inc., Breda (2013)"},{"issue":"12","key":"15_CR16","doi-asserted-by":"publisher","first-page":"2342","DOI":"10.1109\/TCYB.2014.2307862","volume":"44","author":"C Yu","year":"2014","unstructured":"Yu, C., Zhang, M., Ren, F.: Collective learning for the emergence of social norms in networked multiagent systems. IEEE Trans. Cybern. 44(12), 2342\u20132355 (2014)","journal-title":"IEEE Trans. Cybern."},{"issue":"6","key":"15_CR17","first-page":"A187","volume":"8","author":"TP Lillicrap","year":"2015","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. Comput. Sci. 8(6), A187 (2015)","journal-title":"Comput. Sci."},{"key":"15_CR18","doi-asserted-by":"crossref","unstructured":"Gu, S., Holly, E., Lillicrap, T., Levine, S.: Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. In: ICRA 2017, pp. 3389\u20133396 (2017)","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"15_CR19","unstructured":"Gu, S., Lillicrap, T., Sutskever, I., Levine, S.: Continuous deep q-learning with model-based acceleration. In: ICML 2016, pp. 2829\u20132838 (2016)"},{"issue":"12","key":"15_CR20","doi-asserted-by":"publisher","first-page":"2853","DOI":"10.1109\/TCYB.2014.2387277","volume":"45","author":"C Yu","year":"2015","unstructured":"Yu, C., Zhang, M., Ren, F.: Multiagent learning of coordination in loosely coupled multiagent systems. IEEE Trans. Cybern. 45(12), 2853\u20132867 (2015)","journal-title":"IEEE Trans. Cybern."}],"container-title":["Lecture Notes in Computer Science","PRICAI 2018: Trends in Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-97304-3_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2018,7,26]],"date-time":"2018-07-26T14:45:01Z","timestamp":1532616301000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-97304-3_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319973036","9783319973043"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-97304-3_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]}}}