{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T06:04:16Z","timestamp":1773641056456,"version":"3.50.1"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2019,11,6]],"date-time":"2019-11-06T00:00:00Z","timestamp":1572998400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,11,6]],"date-time":"2019-11-06T00:00:00Z","timestamp":1572998400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Intell Robot Syst"],"published-print":{"date-parts":[[2020,8]]},"DOI":"10.1007\/s10846-019-01106-x","type":"journal-article","created":{"date-parts":[[2019,11,6]],"date-time":"2019-11-06T11:02:56Z","timestamp":1573038176000},"page":"371-386","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":56,"title":["Multi-robot Target Encirclement Control with Collision Avoidance via Deep Reinforcement Learning"],"prefix":"10.1007","volume":"99","author":[{"given":"Junchong","family":"Ma","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huimin","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junhao","family":"Xiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiwen","family":"Zeng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiqiang","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,11,6]]},"reference":[{"issue":"3","key":"1106_CR1","doi-asserted-by":"publisher","first-page":"1244","DOI":"10.1016\/j.jnca.2006.04.011","volume":"30","author":"R Aguilar-Ponce","year":"2007","unstructured":"Aguilar-Ponce, R., Kumar, A., Tecpanecatl-Xihuitl, J.L., Bayoumi, M.: A network of sensor-based framework for automated visual surveillance. J. Netw. Comput. Appl. 30(3), 1244\u20131271 (2007)","journal-title":"J. Netw. Comput. Appl."},{"issue":"3","key":"1106_CR2","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1109\/TNN.2004.826221","volume":"15","author":"A Arleo","year":"2004","unstructured":"Arleo, A., Smeraldi, F., Gerstner, W.: Cognitive navigation based on nonuniform gabor space sampling, unsupervised growing networks, and reinforcement learning. IEEE Trans. Neural Netw. 15(3), 639\u2013652 (2004)","journal-title":"IEEE Trans. Neural Netw."},{"issue":"4","key":"1106_CR3","doi-asserted-by":"publisher","first-page":"741","DOI":"10.1080\/00207721.2013.795632","volume":"45","author":"AL Bustamante","year":"2014","unstructured":"Bustamante, A.L., Molina, J.M., Patricio, M.A.: A practical approach for active camera coordination based on a fusion-driven multi-agent system. Int. J. Syst. Sci. 45(4), 741\u2013755 (2014)","journal-title":"Int. J. Syst. Sci."},{"issue":"11","key":"1106_CR4","doi-asserted-by":"publisher","first-page":"704","DOI":"10.1016\/j.sysconle.2010.08.006","volume":"59","author":"F Chen","year":"2010","unstructured":"Chen, F., Ren, W., Cao, Y.: Surrounding control in cooperative agent networks. Syst. Control Lett. 59 (11), 704\u2013712 (2010)","journal-title":"Syst. Control Lett."},{"key":"1106_CR5","unstructured":"Degris, T., White, M., Sutton, R.S.: Off-policy actor-critic. In 29th International Conference on Machine Learning (2012)"},{"issue":"6","key":"1106_CR6","first-page":"1","volume":"41","author":"A Farinelli","year":"2016","unstructured":"Farinelli, A., Iocchi, L., Nardi, D.: Distributed on-line dynamic task assignment for multi-robot patrolling. Auton. Robot. 41(6), 1\u201325 (2016)","journal-title":"Auton. Robot."},{"issue":"5","key":"1106_CR7","doi-asserted-by":"publisher","first-page":"789","DOI":"10.1109\/TCST.2006.876902","volume":"14","author":"J Finke","year":"2006","unstructured":"Finke, J., Passino, K.M., Sparks, A.G.: Stable task load balancing strategies for cooperative control of networked autonomous air vehicles. IEEE Trans. Control Syst. Technol. 14(5), 789\u2013803 (2006)","journal-title":"IEEE Trans. Control Syst. Technol."},{"key":"1106_CR8","unstructured":"Foerster, J., Assael, I.A., de Freitas, N., Whiteson, S.: Learning to Communicate with Deep Multi-Agent Reinforcement Learning. In: Advances in Neural Information Processing Systems, pp. 2137\u20132145 (2016)"},{"key":"1106_CR9","doi-asserted-by":"crossref","unstructured":"Foerster, J., Farquhar, G., Afouras, T., Nardelli, N., Whiteson, S.: Counterfactual multi-agent policy gradients. arXiv: 1705.08926 (2017)","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"1106_CR10","unstructured":"Foerster, J., Nardelli, N., Farquhar, G., Afouras, T., Torr, P.H., Kohli, P., Whiteson, S.: Stabilising experience replay for deep multi-agent reinforcement learning. arXiv: 1702.08887 (2017)"},{"issue":"2","key":"1106_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10514-015-9450-3","volume":"40","author":"A Franchi","year":"2016","unstructured":"Franchi, A., Stegagno, P., Oriolo, G.: Decentralized multi-robot encirclement of a 3d target with guaranteed collision avoidance. Auton. Robot. 40(2), 1\u201321 (2016)","journal-title":"Auton. Robot."},{"issue":"3","key":"1106_CR12","doi-asserted-by":"publisher","first-page":"1241","DOI":"10.3182\/20140824-6-ZA-1003.00890","volume":"47","author":"AT Hafez","year":"2014","unstructured":"Hafez, A.T., Iskandarani, M., Givigi, S.N., Yousefi, S., Beaulieu, A.: Uavs in formation and dynamic encirclement via model predictive control. IFAC Proc. 47(3), 1241\u20131246 (2014)","journal-title":"IFAC Proc."},{"key":"1106_CR13","doi-asserted-by":"crossref","unstructured":"Hausman, K., Mueller, J., Hariharan, A.: Cooperative multi-robot control for target tracking with onboard sensing. Int. J. Robot. Res. 34, (2015)","DOI":"10.1177\/0278364915602321"},{"key":"1106_CR14","unstructured":"He, D., Xia, Y., Qin, T., Wang, L., Yu, N., Liu, T., Ma, W.Y.: Dual Learning for Machine Translation. In: Advances in Neural Information Processing Systems, pp. 820\u2013828 (2016)"},{"key":"1106_CR15","unstructured":"He, H., Boyd-Graber, J., Kwok, K., Daum\u00e9, H., III: Opponent Modeling in Deep Reinforcement Learning. In: International Conference on Machine Learning, pp. 1804\u20131813 (2016)"},{"key":"1106_CR16","unstructured":"Iida, S., Kanoh, M., Kato, S., Itoh, H.: Reinforcement Learning for Motion Control of Humanoid Robots. In: Ieee\/Rsj International Conference on Intelligent Robots and Systems, vol.4, pp. 3153\u20133157 (2004)"},{"issue":"10","key":"1106_CR17","doi-asserted-by":"publisher","first-page":"2040","DOI":"10.1080\/00207179.2010.504784","volume":"83","author":"T Kim","year":"2010","unstructured":"Kim, T., Hara, S., Hori, Y.: Cooperative control of multi-agent dynamical systems in target-enclosing operations using cyclic pursuit strategy. Int. J. Control. 83(10), 2040\u20132052 (2010)","journal-title":"Int. J. Control."},{"issue":"7","key":"1106_CR18","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1016\/j.sysconle.2010.04.003","volume":"59","author":"Y Lan","year":"2010","unstructured":"Lan, Y., Yan, G., Lin, Z.: Distributed control of cooperative target enclosing based on reachability and invariance analysis. Syst. Control Lett. 59(7), 381\u2013389 (2010)","journal-title":"Syst. Control Lett."},{"key":"1106_CR19","unstructured":"Leibo, J.Z., Zambaldi, V., Lanctot, M., Marecki, J., Graepel, T.: Multi-agent reinforcement learning in sequential social dilemmas. In: Proceedings of the 16th Conference on Autonomous Agents and MultiAgent Systems, pp. 464\u2013473. International Foundation for Autonomous Agents and Multiagent Systems (2017)"},{"key":"1106_CR20","unstructured":"Lillicrap, T.P., Hunt, J.J., Pritzel, A., Heess, N., Erez, T., Tassa, Y., Silver, D., Wierstra, D.: Continuous control with deep reinforcement learning. arXiv: 1509.02971 (2015)"},{"key":"1106_CR21","doi-asserted-by":"crossref","unstructured":"Liu, L., Luo, C., Shen, F.: Multi-Agent Formation Control with Target Tracking and Navigation. In: IEEE International Conference on Information and Automation (2017)","DOI":"10.1109\/ICInfA.2017.8078889"},{"key":"1106_CR22","doi-asserted-by":"crossref","unstructured":"Long, P., Fanl, T., Liao, X., Liu, W., Zhang, H., Pan, J.: Towards Optimally Decentralized Multi-Robot Collision Avoidance via Deep Reinforcement Learning. In: 2018 IEEE International Conference on Robotics and Automation (ICRA), pp. 6252\u20136259. IEEE (2018)","DOI":"10.1109\/ICRA.2018.8461113"},{"key":"1106_CR23","unstructured":"Lowe, R., Wu, Y., Tamar, A., Harb, J., Abbeel, O.P., Mordatch, I.: Multi-Agent Actor-Critic for Mixed Cooperative-Competitive Environments. In: Advances in Neural Information Processing Systems, pp. 6379\u20136390 (2017)"},{"issue":"9","key":"1106_CR24","doi-asserted-by":"publisher","first-page":"1784","DOI":"10.1109\/TCYB.2014.2360368","volume":"45","author":"A Macwan","year":"2015","unstructured":"Macwan, A., Vilela, J., Nejat, G., Benhabib, B.: A multirobot path-planning strategy for autonomous wilderness search and rescue. IEEE Trans. Cybern. 45(9), 1784\u20131797 (2015)","journal-title":"IEEE Trans. Cybern."},{"issue":"7540","key":"1106_CR25","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529 (2015)","journal-title":"Nature"},{"key":"1106_CR26","unstructured":"Omidshafiei, S., Pazis, J., Amato, C., How, J.P., Vian, J.: Deep decentralized multi-task multi-agent reinforcement learning under partial observability. arXiv: 1703.06182 (2017)"},{"issue":"3","key":"1106_CR27","doi-asserted-by":"publisher","first-page":"296","DOI":"10.2307\/1543482","volume":"202","author":"JK Parrish","year":"2002","unstructured":"Parrish, J.K., Viscido, S.V., Grunbaum, D.: Self-organized fish schools: an examination of emergent properties. Biol. Bullet. 202(3), 296\u2013305 (2002)","journal-title":"Biol. Bullet."},{"key":"1106_CR28","doi-asserted-by":"crossref","unstructured":"Sarwal, A., Agrawal, D., Chaudhary, S.: Surveillance in an Open Environment by Co-Operative Tracking Amongst Sensor Enabled Robots. In: 2007. ICIA\u201907. International Conference On Information Acquisition, pp. 345\u2013349. IEEE (2007)","DOI":"10.1109\/ICIA.2007.4295756"},{"key":"1106_CR29","doi-asserted-by":"crossref","unstructured":"Sato, K., Maeda, N.: Target-Enclosing Strategies for Multi-Agent Using Adaptive Control Strategy. In: IEEE International Conference on Control Applications, pp. 1761\u20131766 (2010)","DOI":"10.1109\/CCA.2010.5611117"},{"key":"1106_CR30","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1080\/00207721.2016.1144226","volume":"48","author":"YJ Shi","year":"2017","unstructured":"Shi, Y.J., Li, R., Teo, K.L.: Rotary enclosing control of second-order multi-agent systems for a group of targets. Int. J. Syst. Sci. 48, 13\u201321 (2017)","journal-title":"Int. J. Syst. Sci."},{"issue":"7587","key":"1106_CR31","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., Huang, A., Maddison, C.J., Guez, A., Sifre, L., Van Den Driessche, G., Schrittwieser, J., Antonoglou, I., Panneershelvam, V., Lanctot, M., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484 (2016)","journal-title":"Nature"},{"key":"1106_CR32","unstructured":"Silver, D., Lever, G., Heess, N., Degris, T., Wierstra, D., Riedmiller, M.: Deterministic Policy Gradient Algorithms. Proceedings of the 31st International Conference on Machine Learning (ICML-14), pp. 387\u2013395 (2014)"},{"issue":"7676","key":"1106_CR33","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., Schrittwieser, J., Simonyan, K., Antonoglou, I., Huang, A., Guez, A., Hubert, T., Baker, L., Lai, M., Bolton, A.: Mastering the game of go without human knowledge. Nature 550(7676), 354\u2013359 (2017)","journal-title":"Nature"},{"key":"1106_CR34","doi-asserted-by":"crossref","unstructured":"Su, P.H., Gasic, M., Mrksic, N., Rojas-Barahona, L., Ultes, S., Vandyke, D., Wen, T.H., Young, S.: On-line active reward learning for policy optimisation in spoken dialogue systems. arXiv: 1605.07669 (2016)","DOI":"10.18653\/v1\/P16-1230"},{"key":"1106_CR35","unstructured":"Sukhbaatar, S., Fergus, R., et al.: Learning Multiagent Communication with Backpropagation. In: Advances in Neural Information Processing Systems, pp. 2244\u20132252 (2016)"},{"key":"1106_CR36","doi-asserted-by":"crossref","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning:An introduction. MIT Press, Cambridge (1998)","DOI":"10.1109\/TNN.1998.712192"},{"key":"1106_CR37","doi-asserted-by":"crossref","unstructured":"Tampuu, A., Matiisen, T., Kodelja, D., Kuzovkin, I., Korjus, K., Aru, J., Aru, J., Vicente, R.: Multiagent cooperation and competition with deep reinforcement learning, vol. 12 (2017)","DOI":"10.1371\/journal.pone.0172395"},{"issue":"12","key":"1106_CR38","doi-asserted-by":"publisher","first-page":"3248","DOI":"10.1109\/TAC.2013.2263653","volume":"58","author":"C Wang","year":"2013","unstructured":"Wang, C., Xie, G., Cao, M.: Forming circle formations of anonymous mobile agents with order preservation. IEEE Trans. Autom. Control 58(12), 3248\u20133254 (2013)","journal-title":"IEEE Trans. Autom. Control"},{"issue":"4","key":"1106_CR39","doi-asserted-by":"publisher","first-page":"1100","DOI":"10.1016\/j.automatica.2014.02.036","volume":"50","author":"C Wang","year":"2014","unstructured":"Wang, C., Xie, G., Cao, M.: Controlling anonymous mobile agents with unidirectional locomotion to form formations on a circle. Automatica 50(4), 1100\u20131108 (2014)","journal-title":"Automatica"},{"key":"1106_CR40","unstructured":"Wang, Z., Schaul, T., Hessel, M., Van Hasselt, H., Lanctot, M., De Freitas, N.: Dueling network architectures for deep reinforcement learning. arXiv: 1511.06581 (2015)"},{"key":"1106_CR41","doi-asserted-by":"crossref","unstructured":"Xiao, J., Xiong, D., Yao, W., Yu, Q., Lu, H., Zheng, Z.: Building Software System and Simulation Environment for RoboCup MSL Soccer Robots Based on ROS and Gazebo. Springer International Publishing (2017)","DOI":"10.1007\/978-3-319-54927-9_18"},{"key":"1106_CR42","doi-asserted-by":"publisher","first-page":"883","DOI":"10.1007\/s10846-018-0906-5","volume":"94","author":"W Yao","year":"2019","unstructured":"Yao, W., Lu, H., Zeng, Z., Xiao, J., Zheng, Z.: Distributed static and dynamic circumnavigation control with arbitrary spacings for a heterogeneous multi-robot system. Journal of Intelligent & Robotic Systems 94, 883\u2013905 (2019)","journal-title":"Journal of Intelligent & Robotic Systems"},{"key":"1106_CR43","unstructured":"Zhang, Y., Parker, L.E.: Multi-Robot Task Scheduling. In: IEEE International Conference on Robotics and Automation, pp. 2992\u20132998 (2016)"},{"key":"1106_CR44","unstructured":"Zheng, Y., Luo, S., Lv, Z.: Control Double Inverted Pendulum by Reinforcement Learning with Double Cmac Network. In: International Conference on Pattern Recognition, pp. 639\u2013642 (2006)"}],"container-title":["Journal of Intelligent &amp; Robotic Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-019-01106-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10846-019-01106-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-019-01106-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,3]],"date-time":"2022-10-03T19:45:51Z","timestamp":1664826351000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10846-019-01106-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11,6]]},"references-count":44,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2020,8]]}},"alternative-id":["1106"],"URL":"https:\/\/doi.org\/10.1007\/s10846-019-01106-x","relation":{},"ISSN":["0921-0296","1573-0409"],"issn-type":[{"value":"0921-0296","type":"print"},{"value":"1573-0409","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,11,6]]},"assertion":[{"value":"21 January 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 September 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 November 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}