{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T14:25:54Z","timestamp":1774448754331,"version":"3.50.1"},"reference-count":32,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001381","name":"National Research Foundation, Singapore through its AI Singapore Program","doi-asserted-by":"publisher","award":["AISG2-RP-2020-019"],"award-info":[{"award-number":["AISG2-RP-2020-019"]}],"id":[{"id":"10.13039\/501100001381","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Singapore Ministry of Education (MOE) Academic Research Fund (AcRF) Tier-1","award":["19-C220-SMU-023"],"award-info":[{"award-number":["19-C220-SMU-023"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2022,12]]},"DOI":"10.1109\/tnnls.2021.3087733","type":"journal-article","created":{"date-parts":[[2021,6,22]],"date-time":"2021-06-22T19:40:42Z","timestamp":1624390842000},"page":"7778-7790","source":"Crossref","is-referenced-by-count":31,"title":["End-to-End Hierarchical Reinforcement Learning With Integrated Subgoal Discovery"],"prefix":"10.1109","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6507-4479","authenticated-orcid":false,"given":"Shubham","family":"Pateria","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, Nanyang Technological University, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9774-0264","authenticated-orcid":false,"given":"Budhitama","family":"Subagdja","sequence":"additional","affiliation":[{"name":"School of Computing and Information Systems, Singapore Management University, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0378-4069","authenticated-orcid":false,"given":"Ah-Hwee","family":"Tan","sequence":"additional","affiliation":[{"name":"School of Computing and Information Systems, Singapore Management University, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7313-4339","authenticated-orcid":false,"given":"Chai","family":"Quek","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Nanyang Technological University, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref32","first-page":"15246","article-title":"Search on the replay buffer: Bridging planning and reinforcement learning","volume":"32","author":"eysenbach","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref31","first-page":"1","article-title":"Semi-parametric topological memory for navigation","author":"savinov","year":"2018","journal-title":"Proc 6th Int Conf Learn Represent (ICLR)"},{"key":"ref30","article-title":"Fast marching farthest point sampling","author":"moenning","year":"2003"},{"key":"ref10","first-page":"1963","article-title":"Hierarchical reinforcement learning with integrated discovery of salient subgoals","author":"pateria","year":"2020","journal-title":"Proc 19th Int Conf Auto Agents Multiagent Syst (AAMAS)"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref12","first-page":"5048","article-title":"Hindsight experience replay","volume":"30","author":"andrychowicz","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/COGINF.2007.4341927"},{"key":"ref14","first-page":"1497","article-title":"Skill characterization based on betweenness","author":"?im?ek","year":"2008","journal-title":"Proc 21st Int Conf Neural Inf Process Syst (NIPS)"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015355"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-36755-1_25"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/1102351.1102454"},{"key":"ref18","first-page":"2295","article-title":"A Laplacian framework for option discovery in reinforcement learning","volume":"70","author":"machado","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn (ICML)"},{"key":"ref19","article-title":"Option discovery in hierarchical reinforcement learning using spatio-temporal clustering","author":"srinivas","year":"2016","journal-title":"arXiv 1605 05359"},{"key":"ref28","article-title":"On the bottleneck concept for options discovery: Theoretical underpinnings and extension in continuous state spaces","author":"bacon","year":"2014"},{"key":"ref4","first-page":"3540","article-title":"Feudal networks for hierarchical reinforcement learning","volume":"70","author":"vezhnevets","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn (ICML)"},{"key":"ref27","first-page":"1","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2016","journal-title":"Proc 4th Int Conf Learn Represent (ICLR)"},{"key":"ref3","first-page":"3307","article-title":"Data-efficient hierarchical reinforcement learning","author":"nachum","year":"2018","journal-title":"Proc 32nd Int Conf Neural Inf Process Syst (NIPS)"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2996209"},{"key":"ref29","first-page":"1479","article-title":"Unifying count-based exploration and intrinsic motivation","author":"bellemare","year":"2016","journal-title":"Proc 30th Int Conf Neural Inf Process Syst (NIPS)"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2891792"},{"key":"ref8","first-page":"271","article-title":"Feudal reinforcement learning","volume":"5","author":"dayan","year":"1993","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2805379"},{"key":"ref2","first-page":"1","article-title":"Learning multi-level hierarchies with hindsight","author":"levy","year":"2019","journal-title":"Proc 7th Int Conf Learn Represent (ICLR)"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022140919877"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2608001"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2014.2309437"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.4316\/AECE.2017.04010"},{"key":"ref24","first-page":"1312","article-title":"Universal value function approximators","volume":"37","author":"schaul","year":"2015","journal-title":"Proc 32nd Int Conf Mach Learn"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2018.2866869"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"key":"ref25","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","volume":"48","author":"duan","year":"2016","journal-title":"Mach Learn Res"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/9966944\/09462536.pdf?arnumber=9462536","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,19]],"date-time":"2022-12-19T19:54:29Z","timestamp":1671479669000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9462536\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12]]},"references-count":32,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2021.3087733","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,12]]}}}