{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T17:51:42Z","timestamp":1740160302672,"version":"3.37.3"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2022,1,20]],"date-time":"2022-01-20T00:00:00Z","timestamp":1642636800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,20]],"date-time":"2022-01-20T00:00:00Z","timestamp":1642636800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61906027","61906135"],"award-info":[{"award-number":["61906027","61906135"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010031","name":"Postdoctoral Research Foundation of China","doi-asserted-by":"publisher","award":["2019M661080"],"award-info":[{"award-number":["2019M661080"]}],"id":[{"id":"10.13039\/501100010031","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2022,7]]},"DOI":"10.1007\/s13042-021-01497-0","type":"journal-article","created":{"date-parts":[[2022,1,20]],"date-time":"2022-01-20T00:04:11Z","timestamp":1642637051000},"page":"1927-1944","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["SCC-rFMQ: a multiagent reinforcement learning method in cooperative Markov games with continuous actions"],"prefix":"10.1007","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9157-6050","authenticated-orcid":false,"given":"Chengwei","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Zhuobing","family":"Han","sequence":"additional","affiliation":[]},{"given":"Bingfu","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Wanli","family":"Xue","sequence":"additional","affiliation":[]},{"given":"Jianye","family":"Hao","sequence":"additional","affiliation":[]},{"given":"Xiaohong","family":"Li","sequence":"additional","affiliation":[]},{"given":"Dou","family":"An","sequence":"additional","affiliation":[]},{"given":"Rong","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,1,20]]},"reference":[{"unstructured":"Chevalier-Boisvert M, Willems L, Pal S (2018) Minimalistic gridworld environment for openai gym. GitHub repository, GitHub. https:\/\/github.com\/maximecb\/gym-minigrid","key":"1497_CR1"},{"issue":"3","key":"1497_CR2","doi-asserted-by":"publisher","first-page":"1086","DOI":"10.1109\/TITS.2019.2901791","volume":"21","author":"T Chu","year":"2019","unstructured":"Chu T, Wang J, Codec\u00e0 L, Li Z (2019) Multi-agent deep reinforcement learning for large-scale traffic signal control. IEEE Trans Intell Transport Syst 21(3):1086\u20131095","journal-title":"IEEE Trans Intell Transport Syst"},{"doi-asserted-by":"crossref","unstructured":"Foerster JN, Farquhar G, Afouras T, Nardelli N, Whiteson S (2018) Counterfactual multi-agent policy gradients. In: Thirty-Second AAAI Conference on artificial intelligence, vol 32, no. 1. AAAI","key":"1497_CR3","DOI":"10.1609\/aaai.v32i1.11794"},{"unstructured":"Ganapathi Subramanian S, Poupart P, Taylor ME, Hegde N (2020) Multi type mean field reinforcement learning. In: Proceedings of the 19th International Conference on autonomous agents and multiagent systems. AAMAS, pp 411\u2013419","key":"1497_CR4"},{"key":"1497_CR5","volume-title":"Deep learning","author":"I Goodfellow","year":"2016","unstructured":"Goodfellow I, Bengio Y, Courville A (2016) Deep learning. MIT Press"},{"unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on machine learning. PMLR, pp 1856\u20131865","key":"1497_CR6"},{"unstructured":"Hao X, Wang W, Hao J, Yang Y (2019) Independent generative adversarial self-imitation learning in cooperative multiagent systems. In: Proceedings of the 18th International Conference on autonomous agents and multiagent systems. AAMAS, pp 1315\u20131323","key":"1497_CR7"},{"unstructured":"Jong SD, Verbeeck K, Verbeeck K (2008) Artificial agents learning human fairness. In: Proceedings of the 7th International Joint Conference on autonomous agents and multiagent systems. AAAI, pp 863\u2013870","key":"1497_CR8"},{"issue":"3","key":"1497_CR9","doi-asserted-by":"publisher","first-page":"338","DOI":"10.1109\/5326.704563","volume":"28","author":"L Jouffe","year":"1998","unstructured":"Jouffe L (1998) Fuzzy inference system learning by reinforcement methods. Trans Syst Man Cybern Part C 28(3):338\u2013355","journal-title":"Trans Syst Man Cybern Part C"},{"unstructured":"Konda VR, Tsitsiklis JN (2003) Actor-critic algorithms. In: Advances in neural information processing systems, pp 1008\u20131014","key":"1497_CR10"},{"unstructured":"Lauer M, Riedmiller M (2000) An algorithm for distributed reinforcement learning in cooperative multi-agent systems. In: Proceedings of the Seventeenth International Conference on machine learning. Citeseer","key":"1497_CR11"},{"unstructured":"Lauer M, Riedmiller MA (2000) An algorithm for distributed reinforcement learning in cooperative multi-agent systems. In: Proceedings of the 17h International Conference on machine learning. ICML, pp 535\u2013542","key":"1497_CR12"},{"unstructured":"Lazaric A, Restelli M, Bonarini A (2007) Reinforcement learning in continuous action spaces through sequential Monte Carlo methods. In: Conference on neural information processing systems. NeurIPS, pp 833\u2013840","key":"1497_CR13"},{"key":"1497_CR14","doi-asserted-by":"publisher","first-page":"971","DOI":"10.1109\/TIFS.2019.2932911","volume":"15","author":"D Li","year":"2020","unstructured":"Li D, Yang Q, Yu W, An D, Zhang Y, Zhao W (2020) Towards differential privacy-based online double auction for smart grid. IEEE Trans Inf Forensics and Secur 15:971\u2013986. https:\/\/doi.org\/10.1109\/TIFS.2019.2932911","journal-title":"IEEE Trans Inf Forensics and Secur"},{"issue":"99","key":"1497_CR15","first-page":"1","volume":"51","author":"H Li","year":"2020","unstructured":"Li H, Wu Y, Chen M (2020) Adaptive fault-tolerant tracking control for discrete-time multiagent systems via reinforcement learning algorithm. IEEE Trans Cybern 51(99):1\u201312","journal-title":"IEEE Trans Cybern"},{"issue":"5","key":"1497_CR16","doi-asserted-by":"publisher","first-page":"2239","DOI":"10.1109\/TNNLS.2020.3003950","volume":"32","author":"H Liang","year":"2020","unstructured":"Liang H, Liu G, Zhang H, Huang T (2020) Neural-network-based event-triggered adaptive control of nonaffine nonlinear multiagent systems with dynamic uncertainties. IEEE Trans Neural Netw Learn Syst 32(5):2239\u20132250","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2015) Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971","key":"1497_CR17"},{"unstructured":"Lowe R, WU Y, Tamar A, Harb J, Pieter Abbeel O, Mordatch I (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. In: Advances in neural information processing systems, vol 30. Curran Associates, Inc, pp 6379\u20136390","key":"1497_CR18"},{"doi-asserted-by":"crossref","unstructured":"Matignon L, Laurent GJ, Fort-Piat NL (2007) Hysteretic q-learning: an algorithm for decentralized reinforcement learning in cooperative multi-agent teams. In: IEEE\/RSJ International Conference on intelligent robots and systems IROS. IEEE, pp 64\u201369","key":"1497_CR19","DOI":"10.1109\/IROS.2007.4399095"},{"issue":"1","key":"1497_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1017\/S0269888912000057","volume":"27","author":"L Matignon","year":"2012","unstructured":"Matignon L, Laurent Gj, Le\u00a0fort piat N (2012) Review: Independent reinforcement learners in cooperative Markov games: a survey regarding coordination problems. Knowl Eng Rev 27(1):1\u201331","journal-title":"Knowl Eng Rev"},{"key":"1497_CR21","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1016\/j.chb.2015.06.007","volume":"52","author":"J Meng","year":"2015","unstructured":"Meng J, Williams D, Shen C (2015) Channels matter: multimodal connectedness, types of co-players and social capital for multiplayer online battle arena gamers. Comput Hum Behav 52:190\u2013199","journal-title":"Comput Hum Behav"},{"issue":"7540","key":"1497_CR22","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529","journal-title":"Nature"},{"unstructured":"Omidshafiei S, Pazis J, Amato C, How JP, Vian J (2017) Deep decentralized multi-task multi-agent reinforcement learning under partial observability. In: Proceedings of the 34th International Conference on machine learning-volume 70, pp 2681\u20132690. JMLR. org","key":"1497_CR23"},{"unstructured":"Palmer G, Savani R, Tuyls K (2019) Negative update intervals in deep multi-agent reinforcement learning. In: Proceedings of the 18th International Conference on autonomous agents and multiagent systems, pp 43\u201351. International Foundation for Autonomous Agents and Multiagent Systems","key":"1497_CR24"},{"unstructured":"Palmer G, Tuyls K, Bloembergen D, Savani R (2018) Lenient multi-agent deep reinforcement learning. In: Proceedings of the 17th International Conference on autonomous agents and multiagent systems, pp 443\u2013451. International Foundation for Autonomous Agents and Multiagent Systems","key":"1497_CR25"},{"key":"1497_CR26","doi-asserted-by":"publisher","DOI":"10.1109\/TFUZZ.2020.2999746","author":"Y Pan","year":"2020","unstructured":"Pan Y, Du P, Xue H, Lam HK (2020) Singularity-free fixed-time fuzzy control for robotic systems with user-defined performance. IEEE Trans Fuzzy Syst. https:\/\/doi.org\/10.1109\/TFUZZ.2020.2999746","journal-title":"IEEE Trans Fuzzy Syst"},{"doi-asserted-by":"crossref","unstructured":"Panait L, Sullivan K, Luke S (2006) Lenient learners in cooperative multiagent systems. In: Proceedings of the 5th International Joint Conference on autonomous agents and multiagent systems. AAMAS, pp 801\u2013803","key":"1497_CR27","DOI":"10.1145\/1160633.1160776"},{"issue":"4","key":"1497_CR28","doi-asserted-by":"publisher","first-page":"682","DOI":"10.1016\/j.neunet.2008.02.003","volume":"21","author":"J Peters","year":"2008","unstructured":"Peters J, Schaal S (2008) 2008 special issue: reinforcement learning of motor skills with policy gradients. Neural Netw 21(4):682\u2013697","journal-title":"Neural Netw"},{"unstructured":"Rashid T, Samvelyan M, Witt CS, Farquhar G, Foerster J, Whiteson S (2018) Qmix: monotonic value function factorization for deep multi-agent reinforcement learning. In: International Conference on machine learning. PMLR, pp 4295\u20134304","key":"1497_CR29"},{"issue":"1","key":"1497_CR30","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1007\/s10514-009-9120-4","volume":"27","author":"M Riedmiller","year":"2009","unstructured":"Riedmiller M, Gabel T, Hafner R, Lange S (2009) Reinforcement learning for robot soccer. Auton Robots 27(1):55\u201373","journal-title":"Auton Robots"},{"key":"1497_CR31","volume-title":"Numerical analysis with algorithms and programming","author":"S Saha Ray","year":"2016","unstructured":"Saha Ray S (2016) Numerical analysis with algorithms and programming. CRC Press, Taylor & Francis Group, Boca Raton"},{"key":"1497_CR32","first-page":"1063","volume":"5","author":"B Sallans","year":"2004","unstructured":"Sallans B, Hinton GE (2004) Reinforcement learning with factored states and actions. J Mach Learn Res 5:1063\u20131088","journal-title":"J Mach Learn Res"},{"unstructured":"Schulman J, Levine S, Abbeel P, Jordan M, Moritz P (2015) Trust region policy optimization. In: International Conference on machine learning. PMLR, pp 1889\u20131897","key":"1497_CR33"},{"unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347","key":"1497_CR34"},{"unstructured":"Son K, Kim D, Kang WJ, Hostallero DE, Yi Y (2019) Qtran: learning to factorize with transformation for cooperative multi-agent reinforcement learning. In: International Conference on machine learning. PMLR, pp 5887\u20135896","key":"1497_CR35"},{"unstructured":"Sukhbaatar S, Fergus R et al (2016) Learning multiagent communication with backpropagation. In: Advances in neural information processing systems. NeurIPS, pp 2244\u20132252","key":"1497_CR36"},{"key":"1497_CR37","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction. MIT Press"},{"doi-asserted-by":"crossref","unstructured":"Sutton RS, Maei HR, Precup D, Bhatnagar S, Silver D, Wiewiora E (2009) Fast gradient-descent methods for temporal-difference learning with linear function approximation. In: Proceedings of the 26th Annual International Conference on Machine Learning. PMLR, pp 993\u20131000","key":"1497_CR38","DOI":"10.1145\/1553374.1553501"},{"unstructured":"Tang H, Houthooft R, Foote D, Stooke A, Chen X, Duan Y, Schulman J, De\u00a0Turck F, Abbeel P (2017) # exploration: A study of count-based exploration for deep reinforcement learning. In: 31st Conference on neural information processing systems (NIPS), vol.\u00a030, pp 1\u201318","key":"1497_CR39"},{"issue":"6","key":"1497_CR40","doi-asserted-by":"publisher","first-page":"711","DOI":"10.1109\/TSMCB.2002.1049606","volume":"32","author":"ML Thathachar","year":"2002","unstructured":"Thathachar ML, Sastry PS (2002) Varieties of learning automata: an overview. Syst Man Cybern Part B Cybern IEEE Trans 32(6):711\u2013722","journal-title":"Syst Man Cybern Part B Cybern IEEE Trans"},{"issue":"1","key":"1497_CR41","first-page":"2914","volume":"17","author":"E Wei","year":"2016","unstructured":"Wei E, Luke S (2016) Lenient learning in independent-learner stochastic cooperative games. J Mach Learn Res 17(1):2914\u20132955","journal-title":"J Mach Learn Res"},{"doi-asserted-by":"crossref","unstructured":"Wen C, Yao X, Wang Y, Tan X (2020) Smix ($$\\lambda$$): enhancing centralized value functions for cooperative multi-agent reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence. AAAI, pp 7301\u20137308","key":"1497_CR42","DOI":"10.1609\/aaai.v34i05.6223"},{"unstructured":"Yang Y, Rui L, Li M, Ming Z, Wang J (2018) Mean field multi-agent reinforcement learning. In: The 35th International Conference on machine learning. PMLR, pp 5571\u20135580","key":"1497_CR43"},{"unstructured":"Yu C, Velu A, Vinitsky E, Wang Y, Bayen A, Wu Y (2021) The surprising effectiveness of ppo in cooperative multi-agent games. arXiv preprint arXiv:2103.01955","key":"1497_CR44"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-021-01497-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-021-01497-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-021-01497-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,23]],"date-time":"2023-01-23T20:32:42Z","timestamp":1674505962000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-021-01497-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,20]]},"references-count":44,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2022,7]]}},"alternative-id":["1497"],"URL":"https:\/\/doi.org\/10.1007\/s13042-021-01497-0","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"type":"print","value":"1868-8071"},{"type":"electronic","value":"1868-808X"}],"subject":[],"published":{"date-parts":[[2022,1,20]]},"assertion":[{"value":"12 November 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 December 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 January 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}