{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T20:28:11Z","timestamp":1778012891101,"version":"3.51.4"},"reference-count":35,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Control Syst. Lett."],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/lcsys.2024.3349511","type":"journal-article","created":{"date-parts":[[2024,1,3]],"date-time":"2024-01-03T19:41:44Z","timestamp":1704310904000},"page":"121-126","source":"Crossref","is-referenced-by-count":10,"title":["Developing Driving Strategies Efficiently: A Skill-Based Hierarchical Reinforcement Learning Approach"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3367-7495","authenticated-orcid":false,"given":"Yigit","family":"Gurses","sequence":"first","affiliation":[{"name":"Department of Computer Engineering, Bilkent University, Ankara, Turkey"}]},{"given":"Kaan","family":"Buyukdemirci","sequence":"additional","affiliation":[{"name":"Department of Electrical and Electronics Engineering, Bilkent University, Ankara, Turkey"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6270-5354","authenticated-orcid":false,"given":"Yildiray","family":"Yildiz","sequence":"additional","affiliation":[{"name":"Department of Mechanical Engineering, Bilkent University, Ankara, Turkey"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Dota 2 with large scale deep reinforcement learning","author":"Berner","year":"2019","journal-title":"arXiv:1912.06680"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature24270"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2018.8500556"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2019.8917306"},{"key":"ref8","article-title":"Autonomous vehicle control via deep reinforcement learning","author":"Kardell","year":"2017"},{"key":"ref9","first-page":"359","article-title":"Urban driving with multi-objective deep reinforcement learning","volume-title":"Proc. 18th Int. Conf. Auton. Agents MultiAgent Syst.","author":"Li"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3054625"},{"key":"ref11","article-title":"Designing rewards for fast learning","author":"Sowerby","year":"2022","journal-title":"arXiv:2205.15400"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3090364"},{"key":"ref13","first-page":"1","article-title":"Learning macro-actions in reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Randlov"},{"key":"ref14","article-title":"FeUdal networks for hierarchical reinforcement learning","author":"Vezhnevets","year":"2017","journal-title":"arXiv:1703.01161"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CCTA48906.2021.9658815"},{"key":"ref17","article-title":"Kickstarting deep reinforcement learning","author":"Schmitt","year":"2018","journal-title":"arXiv:1803.03835"},{"key":"ref18","article-title":"Hierarchical Kickstarting for skill transfer in reinforcement learning","author":"Matthews","year":"2022","journal-title":"arXiv:2207.11584"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-30164-8_363"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3453160"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2018.8500368"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1049\/iet-its.2019.0317"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00172"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC48978.2021.9564634"},{"key":"ref25","article-title":"Behavior from the void: Unsupervised active pre-training","author":"Liu","year":"2021","journal-title":"arXiv:2103.04551"},{"key":"ref26","article-title":"Efficient exploration via state marginal matching","author":"Lee","year":"2020","journal-title":"arXiv:1906.05274"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref28","article-title":"Reinforcement learning with prototypical representations","author":"Yarats","year":"2021","journal-title":"arXiv:2102.11271"},{"key":"ref29","article-title":"Dynamics-aware unsupervised discovery of skills","author":"Sharma","year":"2020","journal-title":"arXiv:1907.01657"},{"key":"ref30","article-title":"Diversity is all you need: Learning skills without a reward function","author":"Eysenbach","year":"2018","journal-title":"arXiv:1802.06070"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-70093-9_50"},{"key":"ref32","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"Haarnoja","year":"2018","journal-title":"arXiv:1801.01290"},{"key":"ref33","volume-title":"Interstate 80 Freeway Dataset, FHWA-HRT-06-137","author":"Halkias","year":"2006"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.trb.2015.06.010"},{"key":"ref35","first-page":"249","article-title":"Understanding the difficulty of training deep feedforward neural networks","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Glorot"}],"container-title":["IEEE Control Systems Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7782633\/10411713\/10380124.pdf?arnumber=10380124","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T01:53:52Z","timestamp":1706752432000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10380124\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/lcsys.2024.3349511","relation":{},"ISSN":["2475-1456"],"issn-type":[{"value":"2475-1456","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}