{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T18:39:55Z","timestamp":1772822395714,"version":"3.50.1"},"reference-count":41,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100005073","name":"Agency for Defense Development","doi-asserted-by":"publisher","award":["UD170043JD"],"award-info":[{"award-number":["UD170043JD"]}],"id":[{"id":"10.13039\/501100005073","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/access.2020.3046284","type":"journal-article","created":{"date-parts":[[2020,12,21]],"date-time":"2020-12-21T22:20:36Z","timestamp":1608589236000},"page":"226724-226736","source":"Crossref","is-referenced-by-count":32,"title":["Autonomous Control of Combat Unmanned Aerial Vehicles to Evade Surface-to-Air Missiles Using Deep Reinforcement Learning"],"prefix":"10.1109","volume":"8","author":[{"given":"Gyeong Taek","family":"Lee","sequence":"first","affiliation":[{"name":"Department of Industrial Engineering, Yonsei University, Seoul, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6936-5409","authenticated-orcid":false,"given":"Chang Ouk","family":"Kim","sequence":"additional","affiliation":[{"name":"Department of Industrial Engineering, Yonsei University, Seoul, South Korea"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2966237"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2961426"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICARCV.2016.7838739"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3301273"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/SSRR.2018.8468611"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/j.proeng.2012.01.643"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CCDC.2018.8407416"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CCDC.2018.8407136"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-10-6463-0_24"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2018.2864373"},{"key":"ref10","article-title":"Learning to play with intrinsically-motivated self-aware agents","author":"haber","year":"2018","journal-title":"arXiv 1802 07442"},{"key":"ref40","first-page":"1387","article-title":"Three dimensional optimum controller for multiple UAV formation flight using behavior-based decentralized approach","author":"kim","year":"2007","journal-title":"Proc Control Autom Syst Int Conf (ICCAS)"},{"key":"ref11","article-title":"Self-imitation learning","author":"oh","year":"2018","journal-title":"arXiv 1806 05635"},{"key":"ref12","article-title":"Exploration by random network distillation","author":"burda","year":"2018","journal-title":"arXiv 1810 12894"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992699"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref15","article-title":"Prioritized experience replay","author":"schaul","year":"2015","journal-title":"arXiv 1511 05952"},{"key":"ref16","article-title":"Sample efficient actor-critic with experience replay","author":"wang","year":"2016","journal-title":"arXiv 1611 01224"},{"key":"ref17","article-title":"The reactor: A fast and sample-efficient actor-critic agent for reinforcement learning","author":"gruslys","year":"2017","journal-title":"arXiv 1704 04651"},{"key":"ref18","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref19","first-page":"1008","article-title":"Actor-critic algorithms","author":"konda","year":"2000","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref28","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref4","article-title":"Count-based exploration with neural density models","author":"ostrovski","year":"2017","journal-title":"arXiv 1703 01310"},{"key":"ref27","article-title":"Learning self-imitating diverse policies","author":"gangwani","year":"2018","journal-title":"arXiv 1805 10309"},{"key":"ref3","first-page":"1471","article-title":"Unifying count-based exploration and intrinsic motivation","author":"bellemare","year":"2016","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref6","article-title":"Modeling purposeful adaptive behavior with the principle of maximum causal entropy","author":"ziebart","year":"2010"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.3390\/electronics7120375"},{"key":"ref5","article-title":"Reinforcement learning with deep energy-based policies","author":"haarnoja","year":"2017","journal-title":"arXiv 1702 08165 [cs]"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"157","DOI":"10.1093\/oxfordhb\/9780195399820.013.0010","article-title":"Curiosity and motivation","author":"silvia","year":"2012","journal-title":"The Oxford Handbook of Human Motivation"},{"key":"ref2","article-title":"Playing Atari with deep reinforcement learning","author":"mnih","year":"2013","journal-title":"arXiv 1312 5602"},{"key":"ref9","article-title":"Large-scale study of curiosity-driven learning","author":"burda","year":"2018","journal-title":"arXiv 1808 04355"},{"key":"ref1","author":"franklin","year":"2008","journal-title":"Unmanned combat air vehicles Opportunities for the guided weapons industry"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcss.2007.08.009"},{"key":"ref22","article-title":"Count-based exploration with the successor representation","author":"machado","year":"2018","journal-title":"arXiv 1807 11622"},{"key":"ref21","article-title":"Dora the explorer: Directed outreaching reinforcement action-selection","author":"fox","year":"2018","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref24","article-title":"Parameter space noise for exploration","author":"plappert","year":"2017","journal-title":"arXiv 1706 01905"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.2514\/6.2005-6457"},{"key":"ref23","article-title":"Incentivizing exploration in reinforcement learning with deep predictive models","author":"stadie","year":"2015","journal-title":"arXiv 1507 00814"},{"key":"ref26","article-title":"Diversity is all you need: Learning skills without a reward function","author":"eysenbach","year":"2018","journal-title":"arXiv 1802 06070"},{"key":"ref25","article-title":"Intrinsic motivation and automatic curricula via asymmetric self-play","author":"sukhbaatar","year":"2017","journal-title":"arXiv 1703 05407"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/8948470\/09301300.pdf?arnumber=9301300","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,8]],"date-time":"2022-12-08T06:54:20Z","timestamp":1670482460000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9301300\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/access.2020.3046284","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]}}}