{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T14:36:07Z","timestamp":1730298967505,"version":"3.28.0"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,12,4]],"date-time":"2022-12-04T00:00:00Z","timestamp":1670112000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,4]],"date-time":"2022-12-04T00:00:00Z","timestamp":1670112000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,12,4]]},"DOI":"10.1109\/ssci51031.2022.10022285","type":"proceedings-article","created":{"date-parts":[[2023,1,30]],"date-time":"2023-01-30T20:05:39Z","timestamp":1675109139000},"page":"708-714","source":"Crossref","is-referenced-by-count":1,"title":["Learning Landmark-Oriented Subgoals for Visual Navigation Using Trajectory Memory"],"prefix":"10.1109","author":[{"given":"Jia","family":"Qu","sequence":"first","affiliation":[{"name":"Advanced Technology R&#x0026;D Center Mitsubishi Electric Corporation,Amagasaki,Hyogo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shotaro","family":"Miwa","sequence":"additional","affiliation":[{"name":"Advanced Technology R&#x0026;D Center Mitsubishi Electric Corporation,Amagasaki,Hyogo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yukiyasu","family":"Domae","sequence":"additional","affiliation":[{"name":"Industrial Cyber-Physical Systems Research Center National Institute of Advanced Industrial Science and Technology,Tokyo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"issue":"1","key":"ref1","first-page":"181","article-title":"Between MDPs and Semi-MDPs: A Framework for Temporal Abstraction in RL","volume":"1","author":"Sutton","year":"1999","journal-title":"Statew. Agric. L. Use Baseline 2015"},{"key":"ref2","article-title":"Temporal abstraction in reinforcement learning","author":"Precup","year":"2000","journal-title":"University of Massachusetts Amherst"},{"key":"ref3","first-page":"271","article-title":"Feudal reinforcement learning","author":"Dayan","year":"1992","journal-title":"Advances in Neural Information Processing Systems 5 (NIPS 1992)"},{"key":"ref4","article-title":"Stochastic neural networks for hierarchical reinforcement learning","author":"Florensa","year":"2017","journal-title":"arXiv Prepr."},{"key":"ref5","first-page":"4344","article-title":"Learning by playing solving sparse reward tasks from scratch","volume-title":"International conference on machine learning","author":"Riedmiller","year":"2018"},{"volume-title":"Composing Complex Skills by Learning Transition Policies with Proximity Reward Induction","year":"2019","author":"Lee","key":"ref6"},{"journal-title":"Learning an embedding space for transferable robot skills","year":"2018","author":"Hausman","key":"ref7"},{"volume-title":"Learning to Coordinate Manipulation Skills via Skill Behavior Diversification","year":"2020","author":"Lee","key":"ref8"},{"key":"ref9","article-title":"Hierarchical reinforcement learning for zero-shot generalization with subtask dependencies","volume":"31","author":"Sohn","year":"2018","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref10","first-page":"226","article-title":"Hierarchical Policy Gradient Algorithms","volume-title":"Proceedings of the Twentieth International Conference on International Conference on Machine Learning","author":"Ghavamzadeh","year":"2003"},{"key":"ref11","article-title":"Data-efficient hierarchical reinforcement learning","volume":"31","author":"Nachum","year":"2018","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref12","article-title":"Hierarchical Reinforcement Learning with Hindsight","author":"Levy","year":"2018","journal-title":"arXiv"},{"key":"ref13","first-page":"3540","article-title":"Feudal networks for hierarchical reinforcement learning","volume-title":"International Conference on Machine Learning","author":"Vezhnevets","year":"2017"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/3116.003.0027"},{"key":"ref15","article-title":"Reinforcement learning with long short-term memory","volume":"14","author":"Bakker","year":"2001","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref16","article-title":"Neural map: Structured memory for deep reinforcement learning","author":"Parisotto","year":"2017","journal-title":"arXiv Prepr."},{"key":"ref17","first-page":"1","article-title":"Semi-parametric topological memory for navigation","volume-title":"6th Int. Conf. Learn. Represent. ICLR 2018 - Conf. Track Proc.","author":"Savinov","year":"2018"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/icra.2016.7487174"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01236-7"},{"key":"ref20","article-title":"Unifying map and landmark based representations for visual navigation","author":"Gupta","year":"2017","journal-title":"arXiv Prepr."},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00884"},{"key":"ref22","article-title":"Neural map: Structured memory for deep reinforcement learning","author":"Parisotto","year":"2017","journal-title":"arXiv Prepr."},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1093\/jigpal\/jzp049"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2019.00063"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1162\/089892998562861"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/S1364-6613(02)01961-7"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-008-9209-6"},{"key":"ref28","first-page":"4067","article-title":"Control of memory, active perception, and action in minecraft","volume-title":"33rd International Conference on Machine Learning, ICML 2016","volume":"6","author":"Oh","year":"2016"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr.2018.00884"},{"key":"ref30","article-title":"Neural Machine Translation by Jointly Learning to Align and Translate","volume-title":"3rd International Conference on Learning Representations, ICLR 2015 - Conference Track Proceedings","author":"Bahdanau","year":"2014"},{"key":"ref31","first-page":"1","article-title":"Reinforcement learning with unsupervised auxiliary tasks","volume-title":"5th International Conference on Learning Representations, ICLR 2017 - Conference Track Proceedings","author":"Jaderberg","year":"2017"},{"key":"ref32","article-title":"Deepmind lab","author":"Beattie","year":"2016","journal-title":"arXiv preprint"},{"issue":"11","key":"ref33","article-title":"Visualizing data using t-SNE","volume":"9","author":"Der Maaten","year":"2008","journal-title":"J. Mach. Learn. Res."},{"key":"ref34","first-page":"2809","article-title":"Graying the black box: Understanding DQNs","volume-title":"33rd Int. Conf. Mach. Learn. ICML 2016","volume":"4","author":"Zahavy","year":"2016"}],"event":{"name":"2022 IEEE Symposium Series on Computational Intelligence (SSCI)","start":{"date-parts":[[2022,12,4]]},"location":"Singapore, Singapore","end":{"date-parts":[[2022,12,7]]}},"container-title":["2022 IEEE Symposium Series on Computational Intelligence (SSCI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10022049\/10022014\/10022285.pdf?arnumber=10022285","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,14]],"date-time":"2024-03-14T02:52:41Z","timestamp":1710384761000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10022285\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,4]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/ssci51031.2022.10022285","relation":{},"subject":[],"published":{"date-parts":[[2022,12,4]]}}}