{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T18:25:25Z","timestamp":1769883925636,"version":"3.49.0"},"publisher-location":"Cham","reference-count":34,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030864859","type":"print"},{"value":"9783030864866","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-86486-6_17","type":"book-chapter","created":{"date-parts":[[2021,9,9]],"date-time":"2021-09-09T15:25:48Z","timestamp":1631201148000},"page":"271-286","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Goal Modelling for Deep Reinforcement Learning Agents"],"prefix":"10.1007","author":[{"given":"Jonathan","family":"Leung","sequence":"first","affiliation":[]},{"given":"Zhiqi","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Zhiwei","family":"Zeng","sequence":"additional","affiliation":[]},{"given":"Chunyan","family":"Miao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,9,10]]},"reference":[{"key":"17_CR1","unstructured":"Andreas, J., Klein, D., Levine, S.: Modular multitask reinforcement learning with policy sketches. In: International Conference on Machine Learning, pp. 166\u2013175 (2017)"},{"key":"17_CR2","unstructured":"Andrychowicz, M., et al.: Hindsight experience replay. In: Advances in Neural Information Processing Systems, pp. 5048\u20135058 (2017)"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"Bacon, P.L., Harb, J., Precup, D.: The option-critic architecture. In: Thirty-First AAAI Conference on Artificial Intelligence (2017)","DOI":"10.1609\/aaai.v31i1.10916"},{"issue":"3","key":"17_CR4","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1023\/B:AGNT.0000018806.20944.ef","volume":"8","author":"P Bresciani","year":"2004","unstructured":"Bresciani, P., Perini, A., Giorgini, P., Giunchiglia, F., Mylopoulos, J.: Tropos: an agent-oriented software development methodology. Auton. Agent. Multi-Agent Syst. 8(3), 203\u2013236 (2004). https:\/\/doi.org\/10.1023\/B:AGNT.0000018806.20944.ef","journal-title":"Auton. Agent. Multi-Agent Syst."},{"key":"17_CR5","unstructured":"Chevalier-Boisvert, M.: gym-miniworld environment for openai gym (2018). https:\/\/github.com\/maximecb\/gym-miniworld"},{"key":"17_CR6","unstructured":"Chevalier-Boisvert, M., Willems, L., Pal, S.: Minimalistic gridworld environment for openai gym (2018). https:\/\/github.com\/maximecb\/gym-minigrid"},{"key":"17_CR7","unstructured":"Dayan, P., Hinton, G.E.: Feudal reinforcement learning. In: Advances in Neural Information Processing Systems, pp. 271\u2013278 (1993)"},{"key":"17_CR8","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1613\/jair.639","volume":"13","author":"TG Dietterich","year":"2000","unstructured":"Dietterich, T.G.: Hierarchical reinforcement learning with the MAXQ value function decomposition. J. Artif. Intell. Res. 13, 227\u2013303 (2000)","journal-title":"J. Artif. Intell. Res."},{"key":"17_CR9","unstructured":"Espeholt, L., et al.: IMPALA: scalable distributed deep-RL with importance weighted actor-learner architectures. In: International Conference on Machine Learning, pp. 1407\u20131416. PMLR (2018)"},{"key":"17_CR10","doi-asserted-by":"crossref","unstructured":"Gopalan, N., et al.: Planning with abstract Markov decision processes. In: Twenty-Seventh International Conference on Automated Planning and Scheduling (2017)","DOI":"10.1609\/icaps.v27i1.13867"},{"key":"17_CR11","doi-asserted-by":"crossref","unstructured":"van Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double q-learning. In: Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence, pp. 2094\u20132100 (2016)","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"17_CR12","unstructured":"Icarte, R.T., Klassen, T., Valenzano, R., McIlraith, S.: Using reward machines for high-level task specification and decomposition in reinforcement learning. In: International Conference on Machine Learning, pp. 2107\u20132116 (2018)"},{"key":"17_CR13","unstructured":"Kaelbling, L.P.: Learning to achieve goals. In: Proceedings of the Thirteenth International Joint Conference on Artificial Intelligence, pp. 1094\u20131099 (1993)"},{"key":"17_CR14","unstructured":"Kolve, E., et al.: AI2-THOR: An Interactive 3D Environment for Visual AI. arXiv (2017)"},{"key":"17_CR15","unstructured":"Kulkarni, T.D., Narasimhan, K., Saeedi, A., Tenenbaum, J.: Hierarchical deep reinforcement learning: integrating temporal abstraction and intrinsic motivation. In: Advances in Neural Information Processing Systems, pp. 3675\u20133683 (2016)"},{"key":"17_CR16","unstructured":"Leike, J., Krueger, D., Everitt, T., Martic, M., Maini, V., Legg, S.: Scalable agent alignment via reward modeling: a research direction. arXiv preprint arXiv:1811.07871 (2018)"},{"key":"17_CR17","unstructured":"Levy, A., Konidaris, G., Platt, R., Saenko, K.: Learning multi-level hierarchies with hindsight. arXiv preprint arXiv:1712.00948 (2017)"},{"issue":"3\u20134","key":"17_CR18","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1007\/BF00992699","volume":"8","author":"LJ Lin","year":"1992","unstructured":"Lin, L.J.: Self-improving reactive agents based on reinforcement learning, planning and teaching. Mach. Learn. 8(3\u20134), 293\u2013321 (1992). https:\/\/doi.org\/10.1007\/BF00992699","journal-title":"Mach. Learn."},{"key":"17_CR19","doi-asserted-by":"crossref","unstructured":"Lyu, D., Yang, F., Liu, B., Gustafson, S.: SDRL: interpretable and data-efficient deep reinforcement learning leveraging symbolic planning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 33, pp. 2970\u20132977 (2019)","DOI":"10.1609\/aaai.v33i01.33012970"},{"key":"17_CR20","unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)"},{"key":"17_CR21","unstructured":"Nachum, O., Gu, S.S., Lee, H., Levine, S.: Data-efficient hierarchical reinforcement learning. In: Advances in Neural Information Processing Systems, pp. 3303\u20133313 (2018)"},{"key":"17_CR22","unstructured":"Parr, R., Russell, S.J.: Reinforcement learning with hierarchies of machines. In: Advances in Neural Information Processing Systems, pp. 1043\u20131049 (1998)"},{"key":"17_CR23","unstructured":"Plappert, M., et al.: Multi-goal reinforcement learning: challenging robotics environments and request for research. arXiv preprint arXiv:1802.09464 (2018)"},{"key":"17_CR24","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"ML Puterman","year":"2014","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley, Hoboken (2014)"},{"key":"17_CR25","unstructured":"Roderick, M., Grimm, C., Tellex, S.: Deep abstract q-networks. In: Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems, pp. 131\u2013138 (2018)"},{"key":"17_CR26","unstructured":"Schaul, T., Horgan, D., Gregor, K., Silver, D.: Universal value function approximators. In: International Conference on Machine Learning, pp. 1312\u20131320 (2015)"},{"issue":"4","key":"17_CR27","doi-asserted-by":"publisher","first-page":"1413","DOI":"10.1093\/ietisy\/e89-d.4.1413","volume":"89","author":"Z Shen","year":"2006","unstructured":"Shen, Z., Miao, C., Gay, R., Li, D.: Goal-oriented methodology for agent system development. IEICE Trans. Inf. Syst. 89(4), 1413\u20131420 (2006)","journal-title":"IEICE Trans. Inf. Syst."},{"key":"17_CR28","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (2018)"},{"issue":"1\u20132","key":"17_CR29","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between MDPs and semi-MDPs: a framework for temporal abstraction in reinforcement learning. Artif. Intell. 112(1\u20132), 181\u2013211 (1999)","journal-title":"Artif. Intell."},{"key":"17_CR30","unstructured":"Toro Icarte, R., Klassen, T.Q., Valenzano, R., McIlraith, S.A.: Teaching multiple tasks to an RL agent using LTL. In: Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems, pp. 452\u2013461 (2018)"},{"key":"17_CR31","doi-asserted-by":"crossref","unstructured":"van Lamsweerde, A.: Goal-oriented requirements engineering: a guided tour. In: Proceedings Fifth IEEE International Symposium on Requirements Engineering, pp. 249\u2013262 (2001)","DOI":"10.1109\/ISRE.2001.948567"},{"key":"17_CR32","unstructured":"Vezhnevets, A.S., et al.: Feudal networks for hierarchical reinforcement learning. arXiv preprint arXiv:1703.01161 (2017)"},{"key":"17_CR33","doi-asserted-by":"crossref","unstructured":"Yu, E.S.: Towards modelling and reasoning support for early-phase requirements engineering. In: Proceedings of ISRE 1997: 3rd IEEE International Symposium on Requirements Engineering, pp. 226\u2013235. IEEE (1997)","DOI":"10.1109\/ISRE.1997.566873"},{"key":"17_CR34","unstructured":"Zhang, A., Sukhbaatar, S., Lerer, A., Szlam, A., Fergus, R.: Composable planning with attributes. In: International Conference on Machine Learning, pp. 5842\u20135851. PMLR (2018)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-86486-6_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T22:05:26Z","timestamp":1757369126000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-86486-6_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030864859","9783030864866"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-86486-6_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"10 September 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bilbao","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2021.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"869","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"210","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-9","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held online due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}