{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T15:33:58Z","timestamp":1776785638094,"version":"3.51.2"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,6]],"date-time":"2023-06-06T00:00:00Z","timestamp":1686009600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,6]],"date-time":"2023-06-06T00:00:00Z","timestamp":1686009600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,6]]},"DOI":"10.1109\/meco58584.2023.10155066","type":"proceedings-article","created":{"date-parts":[[2023,6,26]],"date-time":"2023-06-26T18:06:51Z","timestamp":1687802811000},"page":"1-4","source":"Crossref","is-referenced-by-count":13,"title":["An overview of reinforcement learning techniques"],"prefix":"10.1109","author":[{"given":"Damjan","family":"Pecioski","sequence":"first","affiliation":[{"name":"Ss. Cyril and Methodius University in Skopje,Faculty of Mechanical Engineering-Skopje,Skopje,N. Macedonia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Viktor","family":"Gavriloski","sequence":"additional","affiliation":[{"name":"Ss. Cyril and Methodius University in Skopje,Faculty of Mechanical Engineering-Skopje,Skopje,N. Macedonia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Simona","family":"Domazetovska","sequence":"additional","affiliation":[{"name":"Ss. Cyril and Methodius University in Skopje,Faculty of Mechanical Engineering-Skopje,Skopje,N. Macedonia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anastasija","family":"Ignjatovska","sequence":"additional","affiliation":[{"name":"Ss. Cyril and Methodius University in Skopje,Faculty of Mechanical Engineering-Skopje,Skopje,N. Macedonia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","article-title":"Dynamic programming, princeton, nj: Princeton univ","author":"bellman","year":"1957","journal-title":"BellmanDynamic Programming 1957"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-022-13000-0"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-14435-6_7"},{"key":"ref14","author":"howard","year":"1960","journal-title":"Dynamic Programming and Markov Processes"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.5220\/0006393400170026"},{"key":"ref30","first-page":"1","article-title":"Heterogeneous multi-agent deep reinforcement learning for traffic lights control","author":"calvo","year":"2018","journal-title":"Proc 26th Irish Conf Artif Intell Cogn Sci"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2981434"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2941229"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAR49639.2020.9107997"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-57365-6_12-2"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2017.03.003"},{"key":"ref17","first-page":"322","article-title":"June. Friend-or-foe Q-learning in general-sum games","volume":"1","author":"littman","year":"2001","journal-title":"In ICML"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"4213","DOI":"10.1609\/aaai.v33i01.33014213","article-title":"July. Robust multi-agent reinforcement learning via minimax deep deterministic policy gradient","volume":"33","author":"li","year":"2019","journal-title":"In Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"1095","DOI":"10.1073\/pnas.39.10.1095","article-title":"Stochastic games","volume":"39 10","author":"shapley","year":"1953","journal-title":"Proceedings of the National Academy of Sciences"},{"key":"ref18","article-title":"Asymptotic convergence of deep multi-agent actor-critic algorithms","author":"redder","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref24","article-title":"Learning with opponent-learning awareness","author":"foerster","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.2977374"},{"key":"ref26","first-page":"5872","article-title":"Fully decentralized multi-agent reinforcement learning with networked agents","author":"zhang","year":"2018","journal-title":"In International Conference on Machine Learning"},{"key":"ref25","first-page":"5468","article-title":"Separating value functions across time-scales","author":"romoff","year":"2019","journal-title":"In International Conference on Machine Learning"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i13.17357"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.3390\/app112110227"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ISGTEurope.2019.8905628"},{"key":"ref8","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"698","DOI":"10.1177\/0278364920987859","article-title":"How to train your robot with deep reinforcement learning: lessons we have learned","volume":"40","author":"ibarz","year":"2021","journal-title":"The International Journal of Robotics Research"},{"key":"ref4","volume":"22447","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.1995.478953"}],"event":{"name":"2023 12th Mediterranean Conference on Embedded Computing (MECO)","location":"Budva, Montenegro","start":{"date-parts":[[2023,6,6]]},"end":{"date-parts":[[2023,6,10]]}},"container-title":["2023 12th Mediterranean Conference on Embedded Computing (MECO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10154895\/10154897\/10155066.pdf?arnumber=10155066","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,17]],"date-time":"2023-07-17T17:31:30Z","timestamp":1689615090000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10155066\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,6]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/meco58584.2023.10155066","relation":{},"subject":[],"published":{"date-parts":[[2023,6,6]]}}}