{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,5]],"date-time":"2026-04-05T05:13:35Z","timestamp":1775366015958,"version":"3.50.1"},"reference-count":89,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/100018693","name":"Horizon Europe CODECO Project","doi-asserted-by":"publisher","award":["101092696"],"award-info":[{"award-number":["101092696"]}],"id":[{"id":"10.13039\/100018693","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Horizon Europe NEMO Project","award":["101070118"],"award-info":[{"award-number":["101070118"]}]},{"name":"UNICO-5G I+D (B5GEMINI-AIUC) Project"},{"name":"Ministry of Economic Affairs and Digital Transformation of the Spanish Government and the NextGenerationEU","award":["TSI063000-2021-79"],"award-info":[{"award-number":["TSI063000-2021-79"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/access.2024.3472473","type":"journal-article","created":{"date-parts":[[2024,10,2]],"date-time":"2024-10-02T18:25:47Z","timestamp":1727893547000},"page":"146795-146806","source":"Crossref","is-referenced-by-count":46,"title":["Comparative Analysis of A3C and PPO Algorithms in Reinforcement Learning: A Survey on General Environments"],"prefix":"10.1109","volume":"12","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6832-4381","authenticated-orcid":false,"given":"Alberto del","family":"Rio","sequence":"first","affiliation":[{"name":"Signals, Systems and Radiocommunications Department, Escuela T&#x00E9;cnica Superior de Ingenieros de Telecomunicaci&#x00F3;n (ETSIT), Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7382-4276","authenticated-orcid":false,"given":"David","family":"Jimenez","sequence":"additional","affiliation":[{"name":"Physical Electronics, Electrical Engineering and Applied Physics Department, Escuela T&#x00E9;cnica Superior de Ingenieros de Telecomunicaci&#x00F3;n (ETSIT), Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2111-187X","authenticated-orcid":false,"given":"Javier","family":"Serrano","sequence":"additional","affiliation":[{"name":"Informatic Systems Department, Escuela T&#x00E9;cnica Superior de Ingenier&#x00ED;a de Sistemas Inform&#x00E1;ticos (ETSISI), Universidad Polit&#x00E9;cnica de Madrid, Madrid, Spain"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10827"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2021.3070879"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.13140\/RG.2.2.18893.74727"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/SNPD.2017.8022767"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/s10614-021-10119-4"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1111\/mafi.12382"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/SMC.2019.8914201"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/SMC42975.2020.9282951"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202134"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.3390\/app122010343"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.comnet.2024.110334"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-022-12178-7"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TBC.2021.3099728"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TSG.2018.2834219"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2021.3078462"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.148"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/LARS\/SBR\/WRE59448.2023.10332918"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330668"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2020.106706"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00062"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196730"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2019.8917306"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ECICE59523.2023.10383091"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICBASE53849.2021.00107"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3477600"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-020-08896-5"},{"key":"ref29","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ouyang"},{"key":"ref30","article-title":"Fine-tuning language models from human preferences","author":"Ziegler","year":"2020","journal-title":"arXiv:1909.08593"},{"key":"ref31","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proc. 33rd Int. Conf. Mach. Learn.","author":"Mnih"},{"key":"ref32","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv:1707.06347"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3424636.3426907"},{"key":"ref34","article-title":"Openai GYM","author":"Brockman","year":"2016","journal-title":"arXiv:1606.01540"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.1983.6313077"},{"key":"ref36","article-title":"Playing Atari with deep reinforcement learning","author":"Mnih","year":"2013","journal-title":"arXiv:1312.5602"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2018.8500630"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2019.03.018"},{"key":"ref40","article-title":"Trust region policy optimization","author":"Schulman","year":"2017","journal-title":"arXiv:1502.05477"},{"key":"ref41","first-page":"12535","article-title":"On-policy deep reinforcement learning for the average-reward criterion","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","volume":"139","author":"Zhang"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3044196"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCB.2008.2007630"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TIV.2020.3012947"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CCWC.2019.8666545"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICACCI.2017.8125811"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CCGRID.2017.15"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/3459991"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/3640824.3640871"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CSITSS54238.2021.9683467"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.23919\/OCEANS44145.2021.9706000"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ISIEA58478.2023.10212317"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCS57501.2023.10151327"},{"key":"ref56","first-page":"440","article-title":"The influence of reward on the speed of reinforcement learning: An analysis of shaping","volume-title":"Proc. 20th Int. Conf. Mach. Learn.","author":"Laud"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2016.7799377"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/820"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17276"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17300"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3011351"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3028529"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1016\/j.arcontrol.2018.09.005"},{"key":"ref64","article-title":"Designing neural network architectures using reinforcement learning","author":"Baker","year":"2016","journal-title":"arXiv:1611.02167"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2019.06.005"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.03.003"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3264540"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2016.09.141"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2973169"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2007.11.040"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCBDA.2017.7951951"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN48605.2020.9207681"},{"key":"ref73","article-title":"Sample efficient actor-critic with experience replay","author":"Wang","year":"2017","journal-title":"arXiv:1611.01224"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/DSInS60115.2023.10455465"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/IAICT55358.2022.9887435"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3292075"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-022-00573-6"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/428"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1145\/1082473.1082482"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-87481-2_32"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI47803.2020.9308468"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3107375"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-021-05961-4"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2015.2419431"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/CSCI58124.2022.00130"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482292"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2022.3184288"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/TCSII.2023.3343113"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1613\/jair.301"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10380310\/10703056.pdf?arnumber=10703056","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,18]],"date-time":"2024-10-18T17:32:40Z","timestamp":1729272760000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10703056\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":89,"URL":"https:\/\/doi.org\/10.1109\/access.2024.3472473","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}