{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T14:14:07Z","timestamp":1778336047269,"version":"3.51.4"},"reference-count":61,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,1,8]],"date-time":"2025-01-08T00:00:00Z","timestamp":1736294400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,8]],"date-time":"2025-01-08T00:00:00Z","timestamp":1736294400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach. Intell. Res."],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1007\/s11633-023-1482-0","type":"journal-article","created":{"date-parts":[[2025,1,7]],"date-time":"2025-01-07T19:22:28Z","timestamp":1736277748000},"page":"267-288","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Latent Landmark Graph for Efficient Exploration-exploitation Balance in Hierarchical Reinforcement Learning"],"prefix":"10.1007","volume":"22","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5387-9942","authenticated-orcid":false,"given":"Qingyang","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4905-6569","authenticated-orcid":false,"given":"Hongming","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8251-9118","authenticated-orcid":false,"given":"Dengpeng","family":"Xing","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1111-1529","authenticated-orcid":false,"given":"Bo","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,1,8]]},"reference":[{"key":"1482_CR1","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-4095-0","volume-title":"Deep Reinforcement Learning: Fundamentals, Research and Applications","author":"H Dong","year":"2020","unstructured":"H. Dong, Z. L. Ding, S. H. Zhang. Deep Reinforcement Learning: Fundamentals, Research and Applications, Singapore: Springer, 2020. DOI: https:\/\/doi.org\/10.1007\/978-981-15-4095-0."},{"key":"1482_CR2","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1007\/978-981-15-4095-0_3","volume-title":"Deep Reinforcement Learning: Fundamentals, Research and Applications","author":"H M Zhang","year":"2020","unstructured":"H. M. Zhang, T. Y. Yu. Taxonomy of reinforcement learning algorithms. Deep Reinforcement Learning: Fundamentals, Research and Applications, H. Dong, Z. H. Ding, S. H. Zhang, Eds., Singapore: Springer, pp. 125\u2013133, 2020. DOI: https:\/\/doi.org\/10.1007\/978-981-15-4095-0_3."},{"key":"1482_CR3","doi-asserted-by":"publisher","first-page":"6728","DOI":"10.1609\/aaai.v37i6.25825","volume-title":"Proceedings of the 37th AAAI Conference on Artificial Intelligence","author":"F S Bai","year":"2023","unstructured":"F. S. Bai, H. M. Zhang, T. Y. Tao, Z. H. Wu, Y. N. Wang, B. Xu. PiCor: Multi-task deep reinforcement learning with policy correction. In Proceedings of the 37th AAAI Conference on Artificial Intelligence, Washington DC, USA, pp. 6728\u20136736, 2023. DOI: https:\/\/doi.org\/10.1609\/aaai.v37i6.25825."},{"key":"1482_CR4","first-page":"271","volume-title":"Poceedings of International Conference on Neural Information Processing Systems","author":"P Dayan","year":"1992","unstructured":"P. Dayan, G. E. Hinton. Feudal reinforcement learning. In Poceedings of International Conference on Neural Information Processing Systems, Denver, USA, pp. 271\u2013278, 1992."},{"issue":"1\u20132","key":"1482_CR5","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"R S Sutton","year":"1999","unstructured":"R. S. Sutton, D. Precup, S. Smgh. Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning. Artificial Intelligence, vol. 112, no. 1\u20132, pp. 181\u2013211, 1999. DOI: https:\/\/doi.org\/10.1016\/S0004-3702(99)00052-1.","journal-title":"Artificial Intelligence"},{"key":"1482_CR6","first-page":"3682","volume-title":"Proceedings of the 30th International Conference on Neural Information Processing Systems","author":"T D Kulkarni","year":"2016","unstructured":"T. D. Kulkarni, K. R. Narasimhan, A. Saeedi, J. B. Tenenbaum. Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation. In Proceedings of the 30th International Conference on Neural Information Processing Systems, Barcelona, Spain, pp. 3682\u20133690, 2016."},{"key":"1482_CR7","first-page":"3540","volume-title":"Proceedings of the 34th International Conference on Machine Learning","author":"A S Vezhnevets","year":"2017","unstructured":"A. S. Vezhnevets, S. Osindero, T. Schaul, N. Heess, M. Jaderberg, D. Silver, K. Kavukcuoglu. Feudal networks for hierarchical reinforcement learning. In Proceedings of the 34th International Conference on Machine Learning, Sydney, Australia, pp. 3540\u20133549, 2017."},{"issue":"11","key":"1482_CR8","doi-asserted-by":"publisher","first-page":"3409","DOI":"10.1109\/TNNLS.2019.2891792","volume":"30","author":"N Dilokthanakul","year":"2019","unstructured":"N. Dilokthanakul, C. Kaplanis, N. Pawlowski, M. Shanahan. Feature control as intrinsic motivation for hierarchical reinforcement learning. IEEE Transactions on Neural Networks and Learning Systems, vol. 30, no. 11, pp. 3409\u20133418, 2019. DOI: https:\/\/doi.org\/10.1109\/TNNLS.2019.2891792.","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"1482_CR9","volume-title":"Proceedings of the 7th International Conference on Learning Representations","author":"A Levy","year":"2019","unstructured":"A. Levy, G. D. Konidaris, R. Platt Jr, K. Saenko. Learning multi-level hierarchies with hindsight. In Proceedings of the 7th International Conference on Learning Representations, New Orleans, USA, 2019."},{"key":"1482_CR10","volume-title":"Proceedings of the 2nd International Conference on Learning Representations","author":"D P Kingma","year":"2014","unstructured":"D. P. Kingma, M. Welling. Auto-encoding variational Bayes. In Proceedings of the 2nd International Conference on Learning Representations, Banff, Canada, 2014."},{"issue":"4","key":"1482_CR11","doi-asserted-by":"publisher","first-page":"715","DOI":"10.1162\/089976602317318938","volume":"14","author":"L Wiskott","year":"2002","unstructured":"L. Wiskott, T. J. Sejnowski. Slow feature analysis: Unsupervised learning of invariances. Neural Computation, vol. 14, no. 4, pp.715\u2013770, 2002. DOI: https:\/\/doi.org\/10.1162\/089976602317318938.","journal-title":"Neural Computation"},{"key":"1482_CR12","volume-title":"Proceedings of the 6th International Conference on Learning Representations","author":"A P\u00e9r\u00e9","year":"2018","unstructured":"A. P\u00e9r\u00e9, S. Forestier, O. Sigaud, P. Y. Oudeyer. Unsupervised learning of goal spaces for intrinsically motivated goal exploration. In Proceedings of the 6th International Conference on Learning Representations, Vancouver, Canada, 2018."},{"key":"1482_CR13","volume-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems","author":"S Nasiriany","year":"2019","unstructured":"S. Nasiriany, V. H. Pong, S. Lin, S. Levine. Planning with goal-conditioned policies. In Proceedings of the 33rd International Conference on Neural Information Processing Systems, Vancouver, Canada, Article number 1329, 2019."},{"key":"1482_CR14","volume-title":"Proceedings of the 8th International Conference on Learning Representations","author":"S Nair","year":"2020","unstructured":"S. Nair, C. Finn. Hierarchical foresight: Self-supervised learning of long-horizon tasks via visual subgoal generation. In Proceedings of the 8th International Conference on Learning Representations, Addis Ababa, Ethiopia, 2020."},{"key":"1482_CR15","volume-title":"Proceedings of the 9th International Conference on Learning Representations","author":"S Y Li","year":"2020","unstructured":"S. Y. Li, L. L. Zheng, J. H. Wang, C. J. Zhang. Learning subgoal representations with slow dynamics. In Proceedings of the 9th International Conference on Learning Representations, 2020."},{"key":"1482_CR16","volume-title":"Proceedings of the 10th International Conference on Learning Representations","author":"S Y Li","year":"2022","unstructured":"S. Y. Li, J. Zhang, J. H. Wang, Y. Yu, C. J. Zhang. Active hierarchical exploration with stable subgoal representation learning. In Proceedings of the 10th International Conference on Learning Representations, 2022."},{"key":"1482_CR17","volume-title":"Proceedings of the 9th International Conference on Learning Representations","author":"L Q Li","year":"2021","unstructured":"L. Q. Li, R Yang, D. J. Luo. FOCAL: Efficient fully-offline meta-reinforcement learning via distance metric learning and behavior regularization. In Proceedings of the 9th International Conference on Learning Representations, 2021."},{"key":"1482_CR18","volume-title":"Scheduled intrinsic drive: A hierarchical take on intrinsically motivated exploration","author":"J W Zhang","year":"2019","unstructured":"J. W. Zhang, N. Wetzel, N. Dorka, J. Boedecker, W. Burgard. Scheduled intrinsic drive: A hierarchical take on intrinsically motivated exploration, [Online], Available: https:\/\/arxiv.org\/abs\/1903.07400, 2019."},{"key":"1482_CR19","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"T R Zhang","year":"2020","unstructured":"T. R. Zhang, S. Q. Guo, T. Tan, X. L. Hu, F. Chen. Generating adjacency-constrained subgoals in hierarchical reinforcement learning. In Proceedings of the 34th International Conference on Neural Information Processing Systems, Vancouver, Canada, Article number 1811, 2020."},{"key":"1482_CR20","doi-asserted-by":"publisher","first-page":"408","DOI":"10.1007\/978-3-030-61616-8_3","volume-title":"Proceedings of the 29th International Conference on Artificial Neural Networks","author":"F R\u00f6der","year":"2020","unstructured":"F. R\u00f6der, M. Eppe, P. D. H. Nguyen, S. Wermter. Curious hierarchical actor-critic reinforcement learning. In Proceedings of the 29th International Conference on Artificial Neural Networks, Springer, Bratislava, Slovakia, pp. 408\u2013419, 2020. DOI: https:\/\/doi.org\/10.1007\/978-3-030-61616-8_3."},{"key":"1482_CR21","volume-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems","author":"B Eysenbach","year":"2019","unstructured":"B. Eysenbach, R. Salakhutdinov, S. Levine. Search on the replay buffer: Bridging planning and reinforcement learning. In Proceedings of the 33rd International Conference on Neural Information Processing Systems, Vancouver, Canada, Article number 1366, 2019."},{"key":"1482_CR22","volume-title":"Learning world graphs to accelerate hierarchical reinforcement learning","author":"W L Shang","year":"2019","unstructured":"W. L. Shang, A. Trott, S. Zheng, C. M. Xiong, R. Socher. Learning world graphs to accelerate hierarchical reinforcement learning, [Online], Available: https:\/\/arxiv.org\/abs\/1907.00664, 2019."},{"key":"1482_CR23","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"S Emmons","year":"2020","unstructured":"S. Emmons, A. Jain, M. Laskin, T. Kurutach, P. Abbeel, D. Pathak. Sparse graphical memory for robust planning. In Proceedings of the 34th International Conference on Neural Information Processing Systems, Vancouver, Canada, Article number 441, 2020."},{"key":"1482_CR24","first-page":"12611","volume-title":"Proceedings of the 38th International Conference on Machine Learning","author":"L J Zhang","year":"2021","unstructured":"L. J. Zhang, G. Yang, B. C. Stadie. World model as a graph: Learning latent landmarks for planning. In Proceedings of the 38th International Conference on Machine Learning, pp. 12611\u201312620, 2021."},{"key":"1482_CR25","doi-asserted-by":"publisher","first-page":"5026","DOI":"10.1109\/IROS.2012.6386109","volume-title":"Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems","author":"E Todorov","year":"2012","unstructured":"E. Todorov, T. Erez, Y. Tassa. Mujoco: A physics engine for model-based control. In Proceedings of IEEE\/RSJ International Conference on Intelligent Robots and Systems, IEEE, Vilamoura-Algarve, Portugal, pp. 5026\u20135033, 2012. DOI: https:\/\/doi.org\/10.1109\/IROS.2012.6386109."},{"key":"1482_CR26","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN54540.2023.10190993","volume-title":"Proceedings of International Joint Conference on Neural Networks","author":"Q Y Zhang","year":"2023","unstructured":"Q. Y. Zhang, Y. M. Yang, J. Q. Ruan, X. T. Xiong, D. P. Xing, B. Xu. Balancing exploration and exploitation in hierarchical reinforcement learning via latent landmark graphs. In Proceedings of International Joint Conference on Neural Networks, IEEE, Gold Coast, Australia, 2023. DOI: https:\/\/doi.org\/10.1109\/IJCNN54540.2023.10190993."},{"key":"1482_CR27","first-page":"3307","volume-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems","author":"O Nachum","year":"2018","unstructured":"O. Nachum, S. X. Gu, H. Lee, S. Levine. Data-efficient hierarchical reinforcement learning. In Proceedings of the 32nd International Conference on Neural Information Processing Systems, Montreal, Canada, pp. 3307\u20133317, 2018."},{"key":"1482_CR28","first-page":"5055","volume-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems","author":"M Andrychowicz","year":"2017","unstructured":"M. Andrychowicz, F. Wolski, A. Ray, J. Schneider, R. Fong, P. Welinder, B. McGrew, J. Tobin, P. Abbeel, W. Zaremba. Hindsight experience replay. In Proceedings of the 31st International Conference on Neural Information Processing Systems, Long Beach, USA, pp.5055\u20135065, 2017."},{"key":"1482_CR29","volume-title":"Why does hierarchy (sometimes) work so well in reinforcement learning?","author":"O Nachum","year":"2019","unstructured":"O. Nachum, H. R. Tang, X. Y. Lu, S. X. Gu, H. Lee, S. Levine. Why does hierarchy (sometimes) work so well in reinforcement learning? [Online], Available: https:\/\/arxiv.org\/abs\/1909.10618, 2019."},{"key":"1482_CR30","volume-title":"Learning goal embeddings via self-play for hierarchical reinforcement learning","author":"S Sukhbaatar","year":"2018","unstructured":"S. Sukhbaatar, E. Denton, A. Szlam, R. Fergus. Learning goal embeddings via self-play for hierarchical reinforcement learning, [Online], Available: https:\/\/arxiv.org\/abs\/1811.09083, 2018."},{"key":"1482_CR31","volume-title":"Proceedings of the 7th International Conference on Learning Representations","author":"D Ghosh","year":"2018","unstructured":"D. Ghosh, A. Gupta, S. Levine. Learning actionable representations with goal conditioned policies. In Proceedings of the 7th International Conference on Learning Representations, New Orleans, USA, 2018."},{"key":"1482_CR32","doi-asserted-by":"publisher","first-page":"240","DOI":"10.1109\/DEVLRN.2019.8850723","volume-title":"Proceedings of the Joint IEEE 9th International Conference on Development and Learning and Epigenetic Robotics","author":"M B Hafez","year":"2019","unstructured":"M. B. Hafez, C Weber, M. Kerzel, S. Wermter. Efficient intrinsically motivated robotic grasping with learning-adaptive imagination in latent space. In Proceedings of the Joint IEEE 9th International Conference on Development and Learning and Epigenetic Robotics, IEEE, Oslo, Norway, pp. 240\u2013246, 2019. DOI: https:\/\/doi.org\/10.1109\/DEVLRN.2019.8850723."},{"key":"1482_CR33","doi-asserted-by":"publisher","first-page":"815","DOI":"10.1109\/CVPR.2015.7298682","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","author":"F Schroff","year":"2015","unstructured":"F. Schroff, D. Kalenichenko, J. Philbin. FaceNet: A unified embedding for face recognition and clustering. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, IEEE, Boston, USA, pp. 815\u2013823, 2015. DOI: https:\/\/doi.org\/10.1109\/CVPR.2015.7298682"},{"key":"1482_CR34","first-page":"22270","volume-title":"Proceedings of the 36th Advances in Neural Information Processing Systems","author":"R Z Liu","year":"2022","unstructured":"R. Z. Liu, F. S. Bai, Y. L. Du, Y. D. Yang. Meta-rewardnet: Implicitly differentiable reward learning for preference-based reinforcement learning. In Proceedings of the 36th Advances in Neural Information Processing Systems, New Orleans, USA, pp. 22270\u201322284, 2022."},{"key":"1482_CR35","first-page":"4058","volume-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems","author":"A Barreto","year":"2017","unstructured":"A. Barreto, W. Dabney, R. Munos, J. J. Hunt, T. Schaul, H. Van Hasselt, D. Silver. Successor features for transfer in reinforcement learning. In Proceedings of the 31st International Conference on Neural Information Processing Systems, Long Beach, USA, pp. 4058\u20134068, 2017."},{"key":"1482_CR36","doi-asserted-by":"publisher","first-page":"5125","DOI":"10.1609\/aaai.v34i04.5955","volume-title":"Proceedings of the 34th AAAI Conference on Artificial Intelligence","author":"M C Machado","year":"2020","unstructured":"M. C. Machado, M. G. Bellemare, M. Bowling. Count-based expiration wrth the successor representation. In Proceedings of the 34th AAAI Conference on Artificial Intelligence, New York, USA, pp. 5125\u20135133, 2020. DOI: https:\/\/doi.org\/10.1609\/aaai.v34i04.5955."},{"key":"1482_CR37","doi-asserted-by":"publisher","DOI":"10.1109\/INDIN45523.2021.9557406","volume-title":"Proceedings of the IEEE 19th International Conference on Industrial Informatics","author":"J Zinn","year":"2021","unstructured":"J. Zinn, B. Vogel-Heuser, F. Schuhmann, L. A. C. Salazar. Hierarchical reinforcement learning for waypoint-based exploration in robotic devices. In Proceedings of the IEEE 19th International Conference on Industrial Informatics, IEEE, Palma de Mallorca, Spain, 2021. DOI: https:\/\/doi.org\/10.1109\/INDIN45523.2021.9557406."},{"issue":"2","key":"1482_CR38","doi-asserted-by":"publisher","first-page":"269","DOI":"10.1007\/s00145-010-9084-8","volume":"24","author":"L Batina","year":"2011","unstructured":"L. Batina, B. Gierlichs, E. Prouff, M. Rivain, F. X. Standaert, N. Veyrat-Charvillon. Mutual information analysis: A comprehensive study. Journal of Cryptology, vol. 24, no. 2, pp. 269\u2013291, 2011. DOI: https:\/\/doi.org\/10.1007\/s00145-010-9084-8.","journal-title":"Journal of Cryptology"},{"key":"1482_CR39","volume-title":"Hierarchical soft actor-critic: Adversarial exploration via mutual information optimization","author":"A Azarafrooz","year":"2019","unstructured":"A. Azarafrooz, J. Brock. Hierarchical soft actor-critic: Adversarial exploration via mutual information optimization, [Online], Available: https:\/\/arxiv.org\/abs\/1906.07122, 2019."},{"key":"1482_CR40","volume-title":"Hierarchical reinforcement learning with abductive planning","author":"K Yamamoto","year":"2018","unstructured":"K. Yamamoto, T. Onishi, Y. Tsuruoka. Hierarchical reinforcement learning with abductive planning, [Online], Available: https:\/\/arxiv.org\/abs\/1806.10792, 2018."},{"issue":"4","key":"1482_CR41","doi-asserted-by":"publisher","first-page":"10216","DOI":"10.1109\/LRA.2022.3190100","volume":"7","author":"J N Li","year":"2022","unstructured":"J. N. Li, C. Tang, M. Tomizuka, W. Zhan. Hierarchical planning through goal-conditioned offline reinforcement learning. IEEE Robotics and Automation Letters, vol. 7, no. 4, pp. 10216\u201310223, 2022. DOI: https:\/\/doi.org\/10.1109\/LRA.2022.3190100.","journal-title":"IEEE Robotics and Automation Letters"},{"key":"1482_CR42","volume-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems","author":"Z A Huang","year":"2019","unstructured":"Z. A. Huang, F. C. Liu, H. Su. Mapping state space using landmarks for universal goal reaching. In Proceedings of the 33rd International Conference on Neural Information Processing Systems, Vancouver, Canada, Article number 174, 2019."},{"key":"1482_CR43","first-page":"28336","volume-title":"Proceedings of the 34th Advances in Conference on Neural Information Processing Systems","author":"J Kim","year":"2021","unstructured":"J. Kim, Y. Seo, J. Shin. Landmark-guided subgoal generation in hierarchical reinforcement learning. In Proceedings of the 34th Advances in Conference on Neural Information Processing Systems, pp. 28336\u201328349, 2021."},{"key":"1482_CR44","volume-title":"Graph-enhanced exploration for goal-oriented reinforcement learning","author":"J R Jin","year":"2021","unstructured":"J. R. Jin, S. J. Zhou, W. N. Zhang, T. He, Y. Yu, R. Fakoor. Graph-enhanced exploration for goal-oriented reinforcement learning, [Online], Available: https:\/\/openre-view.net\/forum?id=rlYiXFdSy70, 2021."},{"key":"1482_CR45","volume-title":"Proceedings of the 11th International Conference on Learning Representations","author":"H M Zhang","year":"2023","unstructured":"H. M. Zhang, C. J. Xiao, H. Wang, J. Jin, B. Xu, M. M\u00fcller. Replay memory as an empirical MDP: Combining conservative estimation with experience replay. In Proceedings of the 11th International Conference on Learning Representations, Kigali, Rwanda, 2023."},{"key":"1482_CR46","doi-asserted-by":"publisher","unstructured":"M. B. Hafez, T. Immisch, T. Weber, S. Wermter. Map-based experience replay: A memory-efficient solution to catastrophic forgetting in reinforcement learning. Frontiers in Neurorobotics, vol. 17, Article number 1127642, 2023. DOI: https:\/\/doi.org\/10.3389\/fnbot.2023.1127642.","DOI":"10.3389\/fnbot.2023.1127642"},{"key":"1482_CR47","first-page":"3391","volume-title":"Proceedings of the 22nd International Conference on Artificial Intelligence and Statistics","author":"S Madjiheurem","year":"2019","unstructured":"S. Madjiheurem, L. Toni. Representation learning on graphs: A reinforcement learning application. In Proceedings of the 22nd International Conference on Artificial Intelligence and Statistics, Naha, Japan, pp. 3391\u20133399, 2019."},{"key":"1482_CR48","volume-title":"Graph-based state representation for deep reinforcement learning","author":"V Waradpande","year":"2020","unstructured":"V. Waradpande, D. Kudenko, M. Khosla. Graph-based state representation for deep reinforcement learning, [Online], Available: https:\/\/arxiv.org\/abs\/2004.13965, 2020."},{"key":"1482_CR49","doi-asserted-by":"publisher","unstructured":"F. X. Chen, Y. C. Wang, B. Wang, C. C. J. Kuo. Graph representation learning: A survey. APSIPA Transactions on Signal and Information Processing, vol. 9, Article number e15, 2020. DOI: https:\/\/doi.org\/10.1017\/ATSIP.2020.13.","DOI":"10.1017\/ATSIP.2020.13"},{"key":"1482_CR50","doi-asserted-by":"publisher","unstructured":"Y. Xue, D. Kudenko, M. Khosla. Graph learning-based generation of abstractions for reinforcement learning. Neural Computing and Applications, published online. DOI: https:\/\/doi.org\/10.1007\/s00521-023-08211-x.","DOI":"10.1007\/s00521-023-08211-x"},{"key":"1482_CR51","first-page":"1856","volume-title":"Proceedings of the 35th International Conference on Machine Learning","author":"T Haarnoja","year":"2018","unstructured":"T. Haarnoja, A. Zhou, P. Abbeel, S. Levine. Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In Proceedings of the 35th International Conference on Machine Learning, Stockholm, Sweden, pp. 1856\u20131865, 2018."},{"key":"1482_CR52","first-page":"1312","volume-title":"Proceedings of the 32nd International Conference on International Conference on Machine Learning","author":"T Schaul","year":"2015","unstructured":"T. Schaul, D. Horgan, K. Gregor, D. Silver. Universal value function approximators. In Proceedings of the 32nd International Conference on International Conference on Machine Learning, Lille, France, pp. 1312\u20131320, 2015."},{"issue":"9","key":"1482_CR53","doi-asserted-by":"publisher","first-page":"1305","DOI":"10.1109\/83.623193","volume":"6","author":"Y Eldar","year":"1997","unstructured":"Y. Eldar, M. Lindenbaum, M. Porat, Y. Y. Zeevi. The farthest point strategy for progressive image sampling. IEEE Transactions on Image Processing, vol. 6, no. 9, pp. 1305\u20131315, 1997. DOI: https:\/\/doi.org\/10.1109\/83.623193.","journal-title":"IEEE Transactions on Image Processing"},{"key":"1482_CR54","doi-asserted-by":"publisher","DOI":"10.1109\/SiPS55645.2022.9919246","volume-title":"Proceedings of IEEE Workshop on Signal Processing Systems","author":"J T Li","year":"2022","unstructured":"J. T. Li, J. Zhou, Y. Xiong, X. Chen, C. Chakrabarti. An adjustable farthest point sampling method for approximately-sorted point cloud data. In Proceedings of IEEE Workshop on Signal Processing Systems, IEEE, Rennes, France, 2022. DOI: https:\/\/doi.org\/10.1109\/SiPS55645.2022.9919246."},{"key":"1482_CR55","first-page":"5105","volume-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems","author":"C R Qi","year":"2017","unstructured":"C. R. Qi, L. Yi, H. Su, L. J. Guibas. Pointnet++: Deep hierarchical feature learning on point sets in a metric space. In Proceedings of the 31st International Conference on Neural Information Processing Systems, Long Beach, USA, pp. 5105\u20135114, 2017."},{"key":"1482_CR56","volume-title":"Proceedings of the 6th International Conference on Learning Representations","author":"V Pong","year":"2018","unstructured":"V. Pong, S. X. Gu, M. Dalal, S. Levine. Temporal difference models: Model-free deep rl for model-based control. In Proceedings of the 6th International Conference on Learning Representations, Vancouver, Canada, 2018."},{"key":"1482_CR57","first-page":"2750","volume-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems","author":"H R Tang","year":"2017","unstructured":"H. R. Tang, R. Houthooft, D. Foote, A. Stooke, X. Chen, Y. Duan, J. Schulman, F. De Turck, P. Abbeel. #Exploration: A study of count-based exploration for deep reinforcement learning. In Proceedings of the 31st International Conference on Neural Information Processing Systems, Long Beach, USA, pp. 2750\u20132759, 2017."},{"key":"1482_CR58","doi-asserted-by":"publisher","first-page":"380","DOI":"10.1145\/509907.509965","volume-title":"Proceedings of the 34th Annual ACM Symposium on Theory of Computing","author":"M S Charikar","year":"2002","unstructured":"M. S. Charikar. Similarity estimation techniques from rounding algorithms. In Proceedings of the 34th Annual ACM Symposium on Theory of Computing, ACM, Montreal, Canada, pp. 380\u2013388, 2002. DOI: https:\/\/doi.org\/10.1145\/509907.509965."},{"issue":"1","key":"1482_CR59","doi-asserted-by":"publisher","first-page":"98","DOI":"10.2307\/3213263","volume":"14","author":"A J Lawrance","year":"1977","unstructured":"A. J. Lawrance, P. A. W. Lewis. An exponential moving-average sequence and point process (EMA1). Journal of Applied Probability, vol. 14, no. 1, pp. 98\u2013113, 1977. DOI: https:\/\/doi.org\/10.2307\/3213263.","journal-title":"Journal of Applied Probability"},{"issue":"5","key":"1482_CR60","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1016\/0376-5075(77)90014-9","volume":"1","author":"J M McQuillan","year":"1977","unstructured":"J. M. McQuillan, D. C. Walden. The ARPA network design decisions. Computer Networks (1976), vol. 1, no. 5, pp. 243\u2013289, 1977. DOI: https:\/\/doi.org\/10.1016\/0376-5075(77)90014-9.","journal-title":"Computer Networks (1976)"},{"issue":"6","key":"1482_CR61","doi-asserted-by":"publisher","first-page":"2064","DOI":"10.1109\/TNNLS.2019.2927869","volume":"31","author":"H R Li","year":"2020","unstructured":"H. R. Li, Q. C. Zhang, D. B. Zhao. Deep reinforcement learning-based automatic exploration for navigation in unknown environment. IEEE Transactions on Neural Networks and Learning Systems, vol. 31, no. 6, pp. 2064\u20132076, 2020. DOI: https:\/\/doi.org\/10.1109\/TNNLS.2019.2927869.","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"}],"container-title":["Machine Intelligence Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-023-1482-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11633-023-1482-0","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-023-1482-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T14:02:24Z","timestamp":1778335344000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11633-023-1482-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,8]]},"references-count":61,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["1482"],"URL":"https:\/\/doi.org\/10.1007\/s11633-023-1482-0","relation":{},"ISSN":["2731-538X","2731-5398"],"issn-type":[{"value":"2731-538X","type":"print"},{"value":"2731-5398","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1,8]]},"assertion":[{"value":"31 July 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 October 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 January 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Bo Xu is an editorial board member for\n                      Machine Intelligence Research\n                      and was not involved in the editorial review, or the decision to publish this article. All authors declare that there are no other competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations of conflict of interest"}}]}}