{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:40:11Z","timestamp":1755841211795,"version":"3.44.0"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T00:00:00Z","timestamp":1751932800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T00:00:00Z","timestamp":1751932800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,7,8]]},"DOI":"10.23919\/acc63710.2025.11107738","type":"proceedings-article","created":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T18:17:51Z","timestamp":1755800271000},"page":"200-205","source":"Crossref","is-referenced-by-count":0,"title":["End-to-end Reinforcement Learning for Autonomous Racing: Bridging the sim-to-real gap"],"prefix":"10.23919","author":[{"given":"Csan\u00e1d","family":"Budai","sequence":"first","affiliation":[{"name":"Hungarian Research Network (HUN-REN),Institute for Computer Science and Control (SZTAKI),Budapest,Hungary,H-1111"}]},{"given":"Tam\u00e1s","family":"Sz\u00e9les","sequence":"additional","affiliation":[{"name":"Hungarian Research Network (HUN-REN),Institute for Computer Science and Control (SZTAKI),Budapest,Hungary,H-1111"}]},{"given":"Bal\u00e1zs","family":"N\u00e9meth","sequence":"additional","affiliation":[{"name":"Hungarian Research Network (HUN-REN),Institute for Computer Science and Control (SZTAKI),Budapest,Hungary,H-1111"}]},{"given":"P\u00e9ter","family":"G\u00e1sp\u00e1r","sequence":"additional","affiliation":[{"name":"Hungarian Research Network (HUN-REN),Institute for Computer Science and Control (SZTAKI),Budapest,Hungary,H-1111"}]}],"member":"263","reference":[{"article-title":"Reinforcement learning: an introduction, 2nd edn. adaptive computation and machine learning","year":"2018","author":"Sutton","key":"ref1"},{"article-title":"Learning to walk via deep reinforcement learning","year":"2018","author":"Haarnoja","key":"ref2"},{"article-title":"Solving rubik\u2019s cube with a robot hand","year":"2019","author":"Akkaya","key":"ref3"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TVT.2024.3394699"},{"article-title":"A survey of imitation learning: Algorithms, recent developments, and challenges","year":"2023","author":"Zare","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.3024655"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1126\/science.153.3731.34"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ssci47803.2020.9308468"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202133"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/IV55152.2023.10186780"},{"key":"ref11","first-page":"305","article-title":"Alvinn: an autonomous land vehicle in a neural network","volume-title":"Proceedings of the 1st International Conference on Neural Information Processing Systems","author":"Pomerleau"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICAA52185.2022.00011"},{"article-title":"Bypassing the simulation-to-reality gap: Online reinforcement learning using a supervisor","year":"2022","author":"Evans","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3339568"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2019.2942989"},{"key":"ref16","first-page":"2817","article-title":"Robust adversarial reinforcement learning","volume-title":"International conference on machine learning","author":"Pinto"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.adi8022"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.jtte.2023.09.005"},{"key":"ref19","first-page":"77","article-title":"F1TENTH: An Open-source Evaluation Environment for Continuous Control and Reinforcement Learning","volume-title":"Proceedings of the NeurIPS 2019 Competition and Demonstration Track","volume":"123","author":"O\u2019Kelly"},{"article-title":"Emergent tool use from multi-agent autocurricula","year":"2019","author":"Baker","key":"ref20"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"Schulman","key":"ref21"},{"key":"ref22","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"12","author":"Sutton","year":"1999","journal-title":"Advances in neural information processing systems"},{"issue":"268","key":"ref23","first-page":"1","article-title":"Stable-baselines3: Reliable reinforcement learning implementations","volume-title":"Journal of Machine Learning Research","volume":"22","author":"Raffin","year":"2021"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/PIC50277.2020.9350833"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CCNC49033.2022.9700730"},{"article-title":"The \u201ddisparity extender\" algorithm, and f1\/tenth","year":"2019","author":"Otterness","key":"ref26"}],"event":{"name":"2025 American Control Conference (ACC)","start":{"date-parts":[[2025,7,8]]},"location":"Denver, CO, USA","end":{"date-parts":[[2025,7,10]]}},"container-title":["2025 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11107441\/11107442\/11107738.pdf?arnumber=11107738","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:24:44Z","timestamp":1755840284000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11107738\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,8]]},"references-count":26,"URL":"https:\/\/doi.org\/10.23919\/acc63710.2025.11107738","relation":{},"subject":[],"published":{"date-parts":[[2025,7,8]]}}}