{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,1,12]],"date-time":"2025-01-12T17:40:06Z","timestamp":1736703606871,"version":"3.32.0"},"reference-count":23,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2006,12,20]],"date-time":"2006-12-20T00:00:00Z","timestamp":1166572800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J Intell Robot Syst"],"published-print":{"date-parts":[[2007,1,4]]},"DOI":"10.1007\/s10846-006-9103-z","type":"journal-article","created":{"date-parts":[[2006,12,19]],"date-time":"2006-12-19T14:13:13Z","timestamp":1166537593000},"page":"7-22","source":"Crossref","is-referenced-by-count":16,"title":["Fuzzy Policy Reinforcement Learning in Cooperative Multi-robot Systems"],"prefix":"10.1007","volume":"48","author":[{"given":"Dongbing","family":"Gu","sequence":"first","affiliation":[]},{"given":"Erfu","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2006,12,20]]},"reference":[{"key":"9103_CR1","volume-title":"Advances in Neural Information System, vol.11","author":"L.C. Baird","year":"1995","unstructured":"Baird, L.C., Moore, A.W.: Gradient descent for general reinforcement learning. In: Advances in Neural Information System, vol.11, MIT, Cambridge, MA (1995)"},{"issue":"5","key":"9103_CR2","first-page":"834","volume":"13","author":"A.G. Barto","year":"1983","unstructured":"Barto, A.G., Sutton, R.S., Anderson, C.W.: Neuronlike adaptive elements that can solve difficult learning control problems. IEEE Trans. SMC 13(5), 834\u2013846 (1983)","journal-title":"IEEE Trans. SMC"},{"key":"9103_CR3","doi-asserted-by":"crossref","first-page":"319","DOI":"10.1613\/jair.806","volume":"15","author":"J. Baxter","year":"2001","unstructured":"Baxter, J., Bartlett, P.L.: Infinite-horizon policy-gradient estimation. J. Artif. Intell. Res. 15, 319\u2013350 (2001)","journal-title":"J. Artif. Intell. Res."},{"issue":"5","key":"9103_CR4","doi-asserted-by":"crossref","first-page":"724","DOI":"10.1109\/72.159061","volume":"3","author":"H.R. Berenji","year":"1992","unstructured":"Berenji, H.R., Khedkar, P.: Learning and tuning fuzzy logic controllers through reinforcements. IEEE Trans. Neural Netw. 3(5), 724\u2013740 (1992)","journal-title":"IEEE Trans. Neural Netw."},{"issue":"4","key":"9103_CR5","doi-asserted-by":"crossref","first-page":"478","DOI":"10.1109\/TFUZZ.2003.814834","volume":"11","author":"H.R. Berenji","year":"2003","unstructured":"Berenji, H.R., Vengerov, D.: A convergent actor critic based fuzzy reinforcement learning algorithm with application to power management of wireless transmitters. IEEE Trans. Fuzzy Systems. 11(4), 478\u2013485 (2003)","journal-title":"IEEE Trans. Fuzzy Systems."},{"key":"9103_CR6","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1016\/S0004-3702(02)00121-2","volume":"136","author":"M. Bowling","year":"2002","unstructured":"Bowling, M., Veloso, M.: Multiagent learning using a variable learning rate. Artif. Intell. 136, 215\u2013250 (2002)","journal-title":"Artif. Intell."},{"key":"9103_CR7","doi-asserted-by":"crossref","unstructured":"Grudic, G.Z., Kumar, V., Ungar, L.: Using policy gradient reinforcement learning on autonomous robot controllers. In: Proceedings of IEEE-RSJ International Conference on Intelligent Robots and Systems(IROS), Las Vegas, Nevada, pp. 406\u2013411 (2003)","DOI":"10.1109\/IROS.2003.1250662"},{"key":"9103_CR8","first-page":"1039","volume":"4","author":"J. Hu","year":"2003","unstructured":"Hu, J., Wellman, M.P.: Nash Q-learning for general-sum stochastic games. J. Mach. Learn. Res. 4, 1039\u20131069 (2003)","journal-title":"J. Mach. Learn. Res."},{"key":"9103_CR9","unstructured":"Kimura, H., Yamamura, M., Kobayashi, S.: Reinformcenent leanring by stochastic hill climbing on discounted reward. In: Proceedings of the 12th International Conference Machine Learning, pp. 152\u2013160 California (1995)"},{"key":"9103_CR10","doi-asserted-by":"crossref","unstructured":"Kohl, N., Stone, P.: Policy gradient reinformcenent leanring for fast quadrupedal locomotion. In: Proceedings of the IEEE International Conference on Robotics and Automation(ICRA), pp. 2619\u20132624 New Orleans, LA (2004)","DOI":"10.1109\/ROBOT.2004.1307456"},{"issue":"4","key":"9103_CR11","doi-asserted-by":"crossref","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"V.R. Konda","year":"2003","unstructured":"Konda, V.R., Tsitsiklis, J.N.: Actor-critic algorithms. SIAM J. Control Optim. 42(4), 1143\u20131166 (2003)","journal-title":"SIAM J. Control Optim."},{"key":"9103_CR12","doi-asserted-by":"crossref","unstructured":"Littman, M.L.: Markov games as a framework for multiagent reinforcement learning. In: Proceedings of the 11th International Conference on Machine Learning, pp.157\u2013163 (1994)","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"issue":"1","key":"9103_CR13","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1016\/S1389-0417(01)00015-8","volume":"2","author":"M.L. Littman","year":"2000","unstructured":"Littman, M.L.: Value-function reinforcement learning in Markov games. Cogn. Syst. Res. 2(1), 55\u201366 (2000)","journal-title":"Cogn. Syst. Res."},{"issue":"6","key":"9103_CR14","first-page":"933","volume":"19","author":"R. Olfati-Saber","year":"2006","unstructured":"Olfati-Saber, R.: Flcoking for multi-agent dynamic systems: Algorithms and theory. IEEE Trans. Automat. Contr. 19(6), 933\u2013941 (2006)","journal-title":"IEEE Trans. Automat. Contr."},{"key":"9103_CR15","unstructured":"Peshkin, L., Kim, K., Meuleau, N., Kaelblingn, L.P.: Learning to cooperate via policy search. In: Proceedings of the 6th International Conference on uncertainty in artificial intelligence, pp. 307\u2013314 (2000)"},{"issue":"4","key":"9103_CR16","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1145\/37402.37406","volume":"21","author":"C.W. Reynolds","year":"1987","unstructured":"Reynolds, C.W.: Flocks, herds, and schools: A distributed behavioural model. Comput. Graph. 21(4), 25\u201334 (1987)","journal-title":"Comput. Graph."},{"key":"9103_CR17","first-page":"541","volume-title":"Proceedings of the 16th Annual Conference on Uncertainty in Artificial Intelligence (UAI)","author":"S. Singh","year":"2000","unstructured":"Singh, S., Kearns, M., Mansour, Y.: Nash convergence of gradient dynamics in general-sum games. In: Proceedings of the 16th Annual Conference on Uncertainty in Artificial Intelligence (UAI), pp. 541\u2013548 Stanford University, Stanford, CA (2000)"},{"key":"9103_CR18","first-page":"1057","volume":"12","author":"R.S. Sutton","year":"2000","unstructured":"Sutton, R.S., McAllester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. Adv. Neural Inf. Process. syst. 12, 1057\u20131063 (2000) (MIT)","journal-title":"Adv. Neural Inf. Process. syst."},{"key":"9103_CR19","doi-asserted-by":"crossref","unstructured":"Tanner, H.G., Jadbabaie, A., Pappas, G.J.: Flocking in fixed and switching networks. IEEE Trans. Automat. Contr. (to appear)","DOI":"10.1109\/TAC.2007.895948"},{"key":"9103_CR20","unstructured":"Tao, N., Baxter, J., Weaver, L.: A multi-agent policy-gradient approach to network routing. In: Proceedings of 18th International Conference on Machine Learning, Williamstown MA, pp. 553\u2013560, July 2001"},{"key":"9103_CR21","doi-asserted-by":"crossref","unstructured":"Tedrake, R., Zhang, T., Seung, H.: Stochastic policy gradient reinforcement learning on a simple 3D biped. In: Proceedings of IEEE-RSJ International Conference on Intelligent Robots and Systems(IROS), Senda Japan, pp. 2849\u20132854, October 2004","DOI":"10.1109\/IROS.2004.1389841"},{"key":"9103_CR22","first-page":"229","volume":"8","author":"R.J. William","year":"1992","unstructured":"William, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach. Learn. 8, 229\u2013256 (1992)","journal-title":"Mach. Learn."},{"key":"9103_CR23","unstructured":"Yang, E., Gu, D., Hu, H.: Nonsingular formation control of cooperative mobile robots via feedback linearization. In: Proceedings of IEEE-RSJ International Conference on Intelligent Robots and Systems(IROS), Edmonton, Canada, pp. 3652\u20133657, August 2005"}],"container-title":["Journal of Intelligent and Robotic Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-006-9103-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10846-006-9103-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10846-006-9103-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,12]],"date-time":"2025-01-12T17:15:13Z","timestamp":1736702113000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10846-006-9103-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2006,12,20]]},"references-count":23,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2007,1,4]]}},"alternative-id":["9103"],"URL":"https:\/\/doi.org\/10.1007\/s10846-006-9103-z","relation":{},"ISSN":["0921-0296","1573-0409"],"issn-type":[{"type":"print","value":"0921-0296"},{"type":"electronic","value":"1573-0409"}],"subject":[],"published":{"date-parts":[[2006,12,20]]}}}