{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T08:57:46Z","timestamp":1773392266967,"version":"3.50.1"},"reference-count":46,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100002322","name":"Brazilian Coordination for the Improvement of Higher Education Personnel","doi-asserted-by":"publisher","award":["88882.385785\/2019-01"],"award-info":[{"award-number":["88882.385785\/2019-01"]}],"id":[{"id":"10.13039\/501100002322","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2022]]},"DOI":"10.1109\/access.2022.3189021","type":"journal-article","created":{"date-parts":[[2022,7,7]],"date-time":"2022-07-07T19:25:40Z","timestamp":1657221940000},"page":"72628-72642","source":"Crossref","is-referenced-by-count":19,"title":["Multiagent Reinforcement Learning for Strategic Decision Making and Control in Robotic Soccer Through Self-Play"],"prefix":"10.1109","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6511-8825","authenticated-orcid":false,"given":"Bruno","family":"Brandao","sequence":"first","affiliation":[{"name":"Deep Learning Brazil, Federal University of Goi&#x00E1;s (UFG), Goi&#x00E2;nia, Goi&#x00E1;s, Brazil"}]},{"given":"Telma Woerle","family":"De Lima","sequence":"additional","affiliation":[{"name":"Deep Learning Brazil, Federal University of Goi&#x00E1;s (UFG), Goi&#x00E2;nia, Goi&#x00E1;s, Brazil"}]},{"given":"Anderson","family":"Soares","sequence":"additional","affiliation":[{"name":"Deep Learning Brazil, Federal University of Goi&#x00E1;s (UFG), Goi&#x00E2;nia, Goi&#x00E1;s, Brazil"}]},{"given":"Luckeciano","family":"Melo","sequence":"additional","affiliation":[{"name":"Deep Learning Brazil, Federal University of Goi&#x00E1;s (UFG), Goi&#x00E2;nia, Goi&#x00E1;s, Brazil"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2944-4476","authenticated-orcid":false,"given":"Marcos R. O. A.","family":"Maximo","sequence":"additional","affiliation":[{"name":"Autonomous Computational Systems Laboratory (LAB-SCA), Computer Science Division, Aeronautics Institute of Technology, S&#x00E3;o Jos&#x00E9; dos Campos, S&#x00E3;o Paulo, Brazil"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref2","article-title":"Playing atari with deep reinforcement learning","author":"Mnih","year":"2013"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref4","volume-title":"OpenAI Five","year":"2018"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2018.2823329"},{"key":"ref6","article-title":"Emergence of locomotion behaviours in rich environments","author":"Heess","year":"2017","journal-title":"arXiv:1707.02286"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2017.2720851"},{"key":"ref8","article-title":"Learning dexterous in-hand manipulation","author":"Andrychowicz","year":"2018","journal-title":"arXiv:1808.00177"},{"key":"ref9","first-page":"1","article-title":"Emergent tool use from multi-agent autocurricula","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Baker"},{"key":"ref10","first-page":"1","article-title":"Emergent coordination through competition","volume-title":"Proc. 7th Int. Conf. Learn. Represent. (ICLR)","author":"Liu"},{"key":"ref11","first-page":"1","article-title":"Monotonic value function factorisation for deep multi-agent reinforcement learning","volume":"21","author":"Rashid","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref12","first-page":"7991","article-title":"Emergent social learning via multi-agent reinforcement learning","volume-title":"Proc. 38th Int. Conf. Mach. Learn. (ICML)","volume":"139","author":"Ndousse"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2020.3034827"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TWC.2019.2935201"},{"key":"ref15","volume-title":"Rules for the IEEE Very Small Competition","year":"2021"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.52591\/lxai201912088"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2808266"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICCKE.2017.8167920"},{"key":"ref19","volume-title":"Pequi Mec\u00e2nico\u2014IEEE VSS Soccer Team\/CBR 2019","author":"Martins","year":"2019"},{"key":"ref20","author":"Oliveira","year":"2018","journal-title":"Pequi Mec\u00e2nico INF\u2014IEEE VSS Soccer Team\u2014CBR 2018"},{"key":"ref21","author":"de Oliveira Lima J\u00faniorr","year":"2019","journal-title":"Robocin ia Description Paper"},{"key":"ref22","article-title":"Curriculum-based deep reinforcement learning applied to humanoid robots","author":"Muzio","year":"2018"},{"key":"ref23","article-title":"A deep reinforcement learning method for humanoid kick motion","author":"Melo","year":"2018"},{"key":"ref24","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2013.6614996"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8"},{"key":"ref28","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proc. 33rd Int. Conf. Mach. Learn.","volume":"48","author":"Mnih"},{"key":"ref29","first-page":"834","article-title":"Improving stochastic policy gradients in continuous control with deep reinforcement learning using the beta distribution","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","volume":"70","author":"Chou"},{"key":"ref30","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. 32nd Int. Conf. Mach. Learn.","volume":"37","author":"Schulman"},{"key":"ref31","first-page":"1","article-title":"High-dimensional continuous control using generalized advantage estimation","volume-title":"Proc. 4th Int. Conf. Learn. Represent. (ICLR)","author":"Schulman"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CCDC.2015.7162694"},{"key":"ref33","volume-title":"Rules for the Small Size Soccer League","year":"2020"},{"key":"ref34","volume-title":"Rules for the Humanoid League","year":"2018"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCIAutom.2016.7483162"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CCDC.2017.7979360"},{"key":"ref37","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"arXiv:1509.02971"},{"key":"ref38","article-title":"FACMAC: Factored multi-agent centralised policy gradients","volume-title":"Advances in Neural Information Processing Systems","author":"Peng","year":"2021"},{"key":"ref39","first-page":"1","article-title":"Curriculum learning for reinforcement learning domains: A framework and survey","volume":"21","author":"Narvekar","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref40","first-page":"92","article-title":"Coevolution of a backgammon player","volume-title":"Proc. 5th Int. Workshop Synth. Simul. Living Syst.","author":"Jordan Pollack"},{"key":"ref41","first-page":"1","article-title":"Emergent complexity via multi-agent competition","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Bansal"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref43","article-title":"Fus\u00e3o de sensores para rob\u00f4 da categoria vsss","author":"Oliveira","year":"2018"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1214\/ss\/1177013815"},{"key":"ref46","article-title":"Exponentially weighted imitation learning for batched historical data","volume-title":"Advances in Neural Information Processing Systems","volume":"31","author":"Wang","year":"2018"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/9668973\/09817118.pdf?arnumber=9817118","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T05:06:32Z","timestamp":1706763992000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9817118\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":46,"URL":"https:\/\/doi.org\/10.1109\/access.2022.3189021","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]}}}