{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T05:57:48Z","timestamp":1761976668927,"version":"build-2065373602"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,7,16]],"date-time":"2025-07-16T00:00:00Z","timestamp":1752624000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,16]],"date-time":"2025-07-16T00:00:00Z","timestamp":1752624000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,7,16]]},"DOI":"10.1109\/qrs-c65679.2025.00042","type":"proceedings-article","created":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T17:10:04Z","timestamp":1761930604000},"page":"278-286","source":"Crossref","is-referenced-by-count":0,"title":["Reinforcement Learning Based UAV Control Algorithm and Safety Performance Analysis"],"prefix":"10.1109","author":[{"given":"Siwei","family":"Yan","sequence":"first","affiliation":[{"name":"Beihang University, Beijing,Beijing,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CCA.2010.5611206"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.5772\/62128"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.2514\/6.2018-2134"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3061307"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.23919\/ECC51009.2020.9143591"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-77939-9_17"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10801306"},{"article-title":"Trial without error: towards safe reinforcement learning via human intervention[J]","year":"2017","author":"Saunders","key":"ref8"},{"key":"ref9","article-title":"Model-assisted Reinforcement Learning of A Quadrotor","author":"Javeed","year":"2023","journal-title":"ARXIV-CS. RO"},{"article-title":"Safe Reinforcement Learning by Imagining the Near Future [J]","year":"2022","author":"Thomas","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/lra.2022.3196132"},{"issue":"9","key":"ref12","first-page":"65","article-title":"Safety Control for UAV Formation Reconfiguration Based on Modified Differential Evolution[J]","volume":"21","author":"Ming-Hai","year":"2014","journal-title":"Optics & Control"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2021.104186"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-022-07808-y"},{"issue":"1","key":"ref15","first-page":"21","article-title":"A review on attack, defense and security analysis of deep reinforcement learning[J]","volume":"48","author":"Chen","year":"2022","journal-title":"Journal of Automation"},{"key":"ref16","first-page":"1861","article-title":"Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor[C]","volume-title":"International conference on machine learning","author":"Haarnoja"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.cja.2023.03.035"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3528416.3530865"},{"first-page":"279","article-title":"Remote Teaching of Dynamics and Control of Robots Using ROS 2[C]","volume-title":"13th IFAC Symposium on Advances in Control Education: ACE 2022","key":"ref19"},{"article-title":"Reward constrained policy optimization[J]","year":"2018","author":"Tessler","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9635857"},{"issue":"1","key":"ref22","first-page":"41","article-title":"A review of the research on safety and security technology of unmanned aerial vehicle operation in low altitude airspace[J]","volume":"43","author":"YANG","year":"2024","journal-title":"Journal of Xihua University (Natural Science Edition)"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/MNET.004.2100666"},{"issue":"S1","key":"ref24","first-page":"361","article-title":"Research on reliability assessment method of reinforcement learning for reactor process control[J]","volume":"44","author":"QIAO","year":"2023","journal-title":"Automation Instrumentation"},{"article-title":"Measuring the reliability of reinforcement learning algorithms[J]","year":"2019","author":"Chan","key":"ref25"},{"article-title":"Lyapunov-stable Neural Control for State and Output Feedback: a Novel Formulation[C]","volume-title":"Forty-first International Conference on Machine Learning","author":"Yang","key":"ref26"}],"event":{"name":"2025 25th International Conference on Software Quality, Reliability, and Security Companion (QRS-C)","start":{"date-parts":[[2025,7,16]]},"location":"Hangzhou, China","end":{"date-parts":[[2025,7,20]]}},"container-title":["2025 25th International Conference on Software Quality, Reliability, and Security Companion (QRS-C)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11216429\/11216154\/11216590.pdf?arnumber=11216590","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T05:55:03Z","timestamp":1761976503000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11216590\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,16]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/qrs-c65679.2025.00042","relation":{},"subject":[],"published":{"date-parts":[[2025,7,16]]}}}