{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T16:37:01Z","timestamp":1757608621497,"version":"3.44.0"},"reference-count":52,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62306325"],"award-info":[{"award-number":["62306325"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11128340","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"11538-11545","source":"Crossref","is-referenced-by-count":0,"title":["V-Pilot: A Velocity Vector Control Agent for Fixed-Wing UAVs from Imperfect Demonstrations"],"prefix":"10.1109","author":[{"given":"Xudong","family":"Gong","sequence":"first","affiliation":[{"name":"College of Computer Science and Technology, National University of Defense Technology,Changsha,Hunan,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dawei","family":"Feng","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, National University of Defense Technology,Changsha,Hunan,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kele","family":"Xu","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, National University of Defense Technology,Changsha,Hunan,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xing","family":"Zhou","sequence":"additional","affiliation":[{"name":"College of Intelligence Science and Technology, National University of Defense Technology,Changsha,Hunan,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Si","family":"Zheng","sequence":"additional","affiliation":[{"name":"Qiyuan Lab,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Ding","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, National University of Defense Technology,Changsha,Hunan,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huaimin","family":"Wang","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, National University of Defense Technology,Changsha,Hunan,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICUAS54217.2022.9836212"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TAES.2018.2807558"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICUAS54217.2022.9836182"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICUAS51884.2021.9476700"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3301273"},{"key":"ref6","article-title":"Vvc-gym: A fixed-wing uav reinforcement learning environment for multi-goal long-horizon problems","volume-title":"International Conference on Learning Representations","author":"Gong","year":"2025"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICUAS.2019.8798254"},{"volume-title":"Reinforcement learning: An introduction","year":"2018","author":"Sutton","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICUS50048.2020.9274858"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2022.3213246"},{"key":"ref11","first-page":"12 163","article-title":"Leverage the average: an analysis of kl regularization in reinforcement learning","volume":"33","author":"Vieillard","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1724-z"},{"key":"ref13","article-title":"Iterative regularized policy optimization with imperfect demonstrations","volume-title":"International Conference on Machine Learning. PMLR","author":"Gong","year":"2024"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/770"},{"key":"ref15","article-title":"Unsupervised control through non-parametric discriminative rewards","volume-title":"International Conference on Learning Representations","author":"Warde-Farley","year":"2018"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2022.105151"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.1.88"},{"key":"ref18","article-title":"Autonomous control of simulated fixed wing aircraft using deep reinforcement learning","volume-title":"University of BATH","author":"Rennie","year":"2018"},{"key":"ref19","article-title":"Learning to fly-building an autopilot system based on neural networks and reinforcement learning","volume-title":"FernUniversitat Hagen","author":"Eckstein","year":"2020"},{"volume-title":"fly-craft-examples","year":"2024","author":"Gong","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-023-08232-6"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.2514\/6.2020-0136"},{"key":"ref23","first-page":"1331","article-title":"Curious: intrinsically motivated modular multi-goal reinforcement learning","volume-title":"International conference on machine learning. PMLR","author":"Colas","year":"2019"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICUAS54217.2022.9836131"},{"key":"ref25","article-title":"Learn to fly: Cloning the behavior of a pilot","volume-title":"Universidade do Porto","author":"Medeiros","year":"2021"},{"key":"ref26","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proceedings of the fourteenth international conference on artificial intelligence and statistics. JMLR Workshop and Conference Proceedings","author":"Ross","year":"2011"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICUAS48674.2020.9213850"},{"key":"ref28","article-title":"Learn to fly ii: Acrobatic manoeuvres","volume-title":"Universidade do Porto","author":"de Freitas","year":"2022"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI.2017.8280916"},{"key":"ref30","article-title":"Using learning from demonstration to enable automated flight control comparable with experienced human pilots","volume-title":"UCL (University College London)","author":"Baomar","year":"2020"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1201\/9780429489105"},{"key":"ref32","article-title":"Hind-sight experience replay","volume":"30","author":"Andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref33","first-page":"7750","article-title":"Maximum entropy gain exploration for long horizon multi-goal reinforcement learning","volume-title":"International Conference on Machine Learning. PMLR","author":"Pitis","year":"2020"},{"key":"ref34","article-title":"Goal-conditioned on-policy reinforcement learning","volume-title":"The Thirty-eighth Annual Conference on Neural Information Processing Systems","author":"Gong","year":"2024"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-021-10085-1"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5953"},{"key":"ref37","article-title":"Learning montezuma\u2019s revenge from a single demonstration","author":"Salimans","year":"2018","journal-title":"arXiv preprint"},{"key":"ref38","first-page":"465","article-title":"Integrating behavior cloning and reinforcement learning for improved performance in dense and sparse reward environments","volume-title":"Proceedings of the 19th International Conference on Autonomous Agents and MultiAgent Systems","author":"Goecks","year":"2020"},{"key":"ref39","first-page":"10 905","article-title":"Scc: An efficient deep reinforcement learning agent mastering the game of starcraft ii","volume-title":"International conference on machine learning. PMLR","author":"Wang","year":"2021"},{"key":"ref40","article-title":"Data quality in imitation learning","author":"Belkhale","year":"2023","journal-title":"Advances in neural information processing systems"},{"key":"ref41","first-page":"1312","article-title":"Universal value function approximators","volume-title":"International conference on machine learning. PMLR","author":"Schaul","year":"2015"},{"key":"ref42","article-title":"An analysis of frame-skipping in reinforcement learning","author":"Kalyanakrishnan","year":"2021","journal-title":"arXiv preprint"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561138"},{"key":"ref44","article-title":"Multi-goal reinforcement learning: Challenging robotics environments and request for research","author":"Plappert","year":"2018","journal-title":"arXiv preprint"},{"volume-title":"Practical PID control","year":"2006","author":"Visioli","key":"ref45"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1996.8.3.643"},{"key":"ref47","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017","journal-title":"arXiv preprint"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1611835114"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-73003-5_196"},{"volume-title":"imitation: Clean imitation learning implementations","year":"2022","author":"Gleave","key":"ref50"},{"issue":"268","key":"ref51","first-page":"1","article-title":"Stable-baselines3: Reliable reinforcement learning implementations","volume":"22","author":"Raffin","year":"2021","journal-title":"Journal of Machine Learning Research"},{"key":"ref52","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International conference on machine learning. PMLR","author":"Haarnoja","year":"2018"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2025,5,19]]},"location":"Atlanta, GA, USA","end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11128340.pdf?arnumber=11128340","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T06:03:55Z","timestamp":1756879435000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11128340\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":52,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11128340","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}