{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T21:21:52Z","timestamp":1743024112776,"version":"3.40.3"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031434204"},{"type":"electronic","value":"9783031434211"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43421-1_29","type":"book-chapter","created":{"date-parts":[[2023,9,17]],"date-time":"2023-09-17T20:37:24Z","timestamp":1694983044000},"page":"489-505","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning Hierarchical Planning-Based Policies from\u00a0Offline Data"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4702-1768","authenticated-orcid":false,"given":"Jan","family":"W\u00f6hlke","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5451-8233","authenticated-orcid":false,"given":"Felix","family":"Schmitt","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1583-3692","authenticated-orcid":false,"given":"Herke","family":"van Hoof","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,18]]},"reference":[{"key":"29_CR1","unstructured":"Andrychowicz, M., et al.: Hindsight experience replay. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 5048\u20135058 (2017)"},{"key":"29_CR2","doi-asserted-by":"crossref","unstructured":"Bacon, P.L., Harb, J., Precup, D.: The option-critic architecture. In: AAAI Conference on Artificial Intelligence (2017)","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"29_CR3","doi-asserted-by":"crossref","unstructured":"Bain, M., Sammut, C.: A framework for behavioural cloning. In: Machine Intelligence, vol. 15, pp. 103\u2013129 (1995)","DOI":"10.1093\/oso\/9780198538677.003.0006"},{"issue":"1","key":"29_CR4","first-page":"1","volume":"3","author":"LE Baum","year":"1972","unstructured":"Baum, L.E.: An inequality and associated maximization technique in statistical estimation for probabilistic functions of Markov processes. Inequalities 3(1), 1\u20138 (1972)","journal-title":"Inequalities"},{"issue":"2","key":"29_CR5","doi-asserted-by":"publisher","first-page":"3623","DOI":"10.1109\/LRA.2021.3060403","volume":"6","author":"S Christen","year":"2021","unstructured":"Christen, S., Jendele, L., Aksan, E., Hilliges, O.: Learning functionally decomposed hierarchies for continuous control tasks with path planning. IEEE Robot. Autom. Lett. 6(2), 3623\u20133630 (2021)","journal-title":"IEEE Robot. Autom. Lett."},{"key":"29_CR6","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1007\/s10994-016-5580-x","volume":"104","author":"C Daniel","year":"2016","unstructured":"Daniel, C., Van Hoof, H., Peters, J., Neumann, G.: Probabilistic inference for determining options in reinforcement learning. Mach. Learn. 104, 337\u2013357 (2016)","journal-title":"Mach. Learn."},{"key":"29_CR7","first-page":"271","volume":"5","author":"P Dayan","year":"1992","unstructured":"Dayan, P., Hinton, G.E.: Feudal reinforcement learning. Adv. Neural Inf. Process. Syst. (NeurIPS) 5, 271\u2013278 (1992)","journal-title":"Adv. Neural Inf. Process. Syst. (NeurIPS)"},{"issue":"1","key":"29_CR8","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"AP Dempster","year":"1977","unstructured":"Dempster, A.P., Laird, N.M., Rubin, D.B.: Maximum likelihood from incomplete data via the EM algorithm. J. Royal Stat. Soc.: Ser. B (Methodological) 39(1), 1\u201322 (1977)","journal-title":"J. Royal Stat. Soc.: Ser. B (Methodological)"},{"key":"29_CR9","unstructured":"Eysenbach, B., Salakhutdinov, R., Levine, S.: Search on the replay buffer: bridging planning and reinforcement learning. In: Advances in Neural Information Processing Systems (NeurIPS), vol. 32 (2019)"},{"key":"29_CR10","unstructured":"Fox, R., Krishnan, S., Stoica, I., Goldberg, K.: Multi-level discovery of deep options. arXiv preprint arXiv:1703.08294 (2017)"},{"key":"29_CR11","unstructured":"Fox, R., et al.: Hierarchical variational imitation learning of control programs. arXiv preprint arXiv:1912.12612 (2019)"},{"key":"29_CR12","doi-asserted-by":"crossref","unstructured":"Francis, A., et al.: Long-range indoor navigation with PRM-RL. IEEE Trans. Robot. (2020)","DOI":"10.1109\/TRO.2020.2975428"},{"key":"29_CR13","unstructured":"Fujimoto, S., Conti, E., Ghavamzadeh, M., Pineau, J.: Benchmarking batch deep reinforcement learning algorithms. arXiv preprint arXiv:1910.01708 (2019)"},{"key":"29_CR14","first-page":"20132","volume":"34","author":"S Fujimoto","year":"2021","unstructured":"Fujimoto, S., Gu, S.: A minimalist approach to offline reinforcement learning. Adv. Neural Inf. Process. Syst. (NeurIPS) 34, 20132\u201320145 (2021)","journal-title":"Adv. Neural Inf. Process. Syst. (NeurIPS)"},{"key":"29_CR15","doi-asserted-by":"crossref","unstructured":"Giammarino, V., Paschalidis, I.: Online Baum-Welch algorithm for hierarchical imitation learning. In: Conference on Decision and Control (CDC), pp. 3717\u20133722. IEEE (2021)","DOI":"10.1109\/CDC45484.2021.9683044"},{"issue":"3","key":"29_CR16","doi-asserted-by":"publisher","first-page":"5097","DOI":"10.1109\/LRA.2021.3071062","volume":"6","author":"R Gieselmann","year":"2021","unstructured":"Gieselmann, R., Pokorny, F.T.: Planning-augmented hierarchical reinforcement learning. IEEE Robot. Autom. Lett. 6(3), 5097\u20135104 (2021)","journal-title":"IEEE Robot. Autom. Lett."},{"key":"29_CR17","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning (ICML), pp. 1861\u20131870 (2018)"},{"key":"29_CR18","doi-asserted-by":"crossref","unstructured":"Huber, P.J.: Robust estimation of a location parameter. Ann. Math. Stat., 73\u2013101 (1964)","DOI":"10.1214\/aoms\/1177703732"},{"issue":"1","key":"29_CR19","doi-asserted-by":"publisher","first-page":"172","DOI":"10.3390\/make4010009","volume":"4","author":"M Hutsebaut-Buysse","year":"2022","unstructured":"Hutsebaut-Buysse, M., Mets, K., Latr\u00e9, S.: Hierarchical reinforcement learning: a survey and open research challenges. Mach. Learn. Knowl. Extract. 4(1), 172\u2013221 (2022)","journal-title":"Mach. Learn. Knowl. Extract."},{"key":"29_CR20","unstructured":"Jing, M., et al.: Adversarial option-aware hierarchical imitation learning. In: International Conference on Machine Learning (ICML), pp. 5097\u20135106 (2021)"},{"key":"29_CR21","unstructured":"Kingma, D.P., Ba, J.: ADAM: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"29_CR22","unstructured":"Krishnan, S., Fox, R., Stoica, I., Goldberg, K.: DDCO: discovery of deep continuous options for robot learning from demonstrations. In: Conference on Robot Learning (CoRL), pp. 418\u2013437 (2017)"},{"key":"29_CR23","first-page":"1179","volume":"33","author":"A Kumar","year":"2020","unstructured":"Kumar, A., Zhou, A., Tucker, G., Levine, S.: Conservative Q-learning for offline reinforcement learning. Adv. Neural Inf. Process. Syst. (NeurIPS) 33, 1179\u20131191 (2020)","journal-title":"Adv. Neural Inf. Process. Syst. (NeurIPS)"},{"key":"29_CR24","unstructured":"Le, H., Jiang, N., Agarwal, A., Dud\u00edk, M., Yue, Y., Daum\u00e9 III, H.: Hierarchical imitation and reinforcement learning. In: International Conference on Machine Learning (ICML), pp. 2917\u20132926 (2018)"},{"key":"29_CR25","unstructured":"Levy, A., Konidaris, G., Platt, R., Saenko, K.: Learning multi-level hierarchies with hindsight. In: International Conference on Learning Representations (ICLR) (2019)"},{"key":"29_CR26","doi-asserted-by":"crossref","unstructured":"Li, B., Li, J., Lu, T., Cai, Y., Wang, S.: Hierarchical learning from demonstrations for long-horizon tasks. In: International Conference on Robotics and Automation (ICRA), pp. 4545\u20134551. IEEE (2021)","DOI":"10.1109\/ICRA48506.2021.9561408"},{"issue":"7540","key":"29_CR27","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529 (2015)","journal-title":"Nature"},{"key":"29_CR28","unstructured":"Nardelli, N., Synnaeve, G., Lin, Z., Kohli, P., Torr, P.H., Usunier, N.: Value propagation networks. In: International Conference on Learning Representations (ICLR) (2019)"},{"key":"29_CR29","doi-asserted-by":"crossref","unstructured":"Prudencio, R.F., Maximo, M.R., Colombini, E.L.: A survey on offline reinforcement learning: taxonomy, review, and open problems. IEEE Trans. Neural Netw. Learn. Syst. (2023)","DOI":"10.1109\/TNNLS.2023.3250269"},{"key":"29_CR30","unstructured":"Salakhutdinov, R., Roweis, S.T., Ghahramani, Z.: Optimization with EM and expectation-conjugate-gradient. In: International Conference on Machine Learning (ICML), pp. 672\u2013679 (2003)"},{"key":"29_CR31","unstructured":"Smith, M., Van Hoof, H., Pineau, J.: An inference-based policy gradient method for learning options. In: International Conference on Machine Learning (ICML), pp. 4703\u20134712. PMLR (2018)"},{"issue":"1\u20132","key":"29_CR32","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between MDPs and semi-MDPs: a framework for temporal abstraction in reinforcement learning. Artif. Intell. 112(1\u20132), 181\u2013211 (1999)","journal-title":"Artif. Intell."},{"key":"29_CR33","doi-asserted-by":"crossref","unstructured":"Todorov, E., Erez, T., Tassa, Y.: Mujoco: A physics engine for model-based control. In: International Conference on Intelligent Robots and Systems, pp. 5026\u20135033. IEEE (2012)","DOI":"10.1109\/IROS.2012.6386109"},{"key":"29_CR34","doi-asserted-by":"crossref","unstructured":"Van Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double Q-learning. In: AAAI Conference on Artificial Intelligence, vol. 30 (2016)","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"29_CR35","unstructured":"Vecerik, M., et al.: Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards. arXiv preprint arXiv:1707.08817 (2017)"},{"key":"29_CR36","unstructured":"Vezhnevets, A.S., et al.: FeUdal networks for hierarchical reinforcement learning. In: International Conference on Machine Learning (ICML), pp. 3540\u20133549 (2017)"},{"key":"29_CR37","doi-asserted-by":"crossref","unstructured":"W\u00f6hlke, J., Schmitt, F., Van Hoof, H.: Hierarchies of planning and reinforcement learning for robot navigation. In: International Conference on Robotics and Automation (ICRA), pp. 10682\u201310688. IEEE (2021)","DOI":"10.1109\/ICRA48506.2021.9561151"},{"key":"29_CR38","doi-asserted-by":"crossref","unstructured":"W\u00f6hlke, J., Schmitt, F., Van Hoof, H.: Value refinement network (VRN). In: International Joint Conference on Artificial Intelligence (IJCAI), pp. 3558\u20133565 (2022)","DOI":"10.24963\/ijcai.2022\/494"},{"key":"29_CR39","unstructured":"Zhang, Z., Paschalidis, I.: Provable hierarchical imitation learning via EM. In: International Conference on Artificial Intelligence and Statistics (AISTATS), pp. 883\u2013891 (2021)"},{"key":"29_CR40","doi-asserted-by":"crossref","unstructured":"Zheng, B., Verma, S., Zhou, J., Tsang, I.W., Chen, F.: Imitation learning: progress, taxonomies and challenges. IEEE Trans. Neural Netw. Learn. Syst., 1\u201316 (2022)","DOI":"10.1109\/TNNLS.2022.3213246"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases: Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43421-1_29","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T06:58:30Z","timestamp":1730098710000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43421-1_29"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031434204","9783031434211"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43421-1_29","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"18 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"We did not collect or process any personal data, for this work. All data was collected using a physics simulation of a point mass agent. There are several possible future applications for our research, like, for example, in autonomous vehicles or robotics, which hopefully have a positive impact on society. There are, however, also risks of negative societal impact, through the form of application itself, the impact on the job market, or real-world application without proper verification and validation. Such factors should be taken into consideration when designing applications.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Statement"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2023.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"829","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"196","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.63","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Applied Data Science Track: 239 submissions, 58 accepted papers; Demo Track: 31 submissions, 16 accepted papers.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}