{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:18:18Z","timestamp":1764267498436,"version":"3.46.0"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2024,8,26]],"date-time":"2024-08-26T00:00:00Z","timestamp":1724630400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,8,26]],"date-time":"2024-08-26T00:00:00Z","timestamp":1724630400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"National Science Foundation","award":["1837369"],"award-info":[{"award-number":["1837369"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1007\/s10994-024-06614-y","type":"journal-article","created":{"date-parts":[[2024,8,26]],"date-time":"2024-08-26T16:20:21Z","timestamp":1724689221000},"page":"7509-7540","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Generalization of temporal logic tasks via future dependent options"],"prefix":"10.1007","volume":"113","author":[{"given":"Duo","family":"Xu","sequence":"first","affiliation":[]},{"given":"Faramarz","family":"Fekri","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,26]]},"reference":[{"key":"6614_CR1","unstructured":"Andreas, J., Klein, D., & Levine, S. (2017). Modular multitask reinforcement learning with policy sketches. In International Conference on Machine Learning (pp. 166\u2013175). PMLR."},{"key":"6614_CR2","unstructured":"Andrychowicz, M., Wolski, F., Ray, A., Schneider, J., Fong, R., Welinder, P., McGrew, B., Tobin, J., Pieter\u00a0Abbeel, O., & Zaremba, W. (2017). Hindsight experience replay. Advances in Neural Information Processing Systems 30."},{"key":"6614_CR3","unstructured":"Araki, B., Li, X., Vodrahalli, K., DeCastro, J., Fry, M., & Rus, D. (2021). The logical options framework. In International Conference on Machine Learning, (pp. 307\u2013317). PMLR."},{"key":"6614_CR4","unstructured":"Asadi, K., Misra, D., & Littman, M. (2018). Lipschitz continuity in model-based reinforcement learning. In International Conference on Machine Learning, (pp. 264\u2013273). PMLR."},{"issue":"1\u20132","key":"6614_CR5","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1016\/S0004-3702(99)00071-5","volume":"116","author":"F Bacchus","year":"2000","unstructured":"Bacchus, F., & Kabanza, F. (2000). Using temporal logics to express search control knowledge for planning. Artificial Intelligence, 116(1\u20132), 123\u2013191.","journal-title":"Artificial Intelligence"},{"key":"6614_CR6","unstructured":"Badia, A.P., Piot, B., Kapturowski, S., Sprechmann, P., Vitvitskyi, A., Guo, Z. D., & Blundell, C. (2020). Agent57: Outperforming the atari human benchmark. In International Conference on Machine Learning (pp. 507\u2013517). PMLR."},{"key":"6614_CR7","doi-asserted-by":"crossref","unstructured":"Camacho, A., Icarte, R. T., Klassen, T. Q., Valenzano, R. A., & McIlraith, S. A. (2019). Ltl and beyond: Formal languages for reward function specification in reinforcement learning. In IJCAI.","DOI":"10.24963\/ijcai.2019\/840"},{"key":"6614_CR8","unstructured":"Chane-Sane, E., Schmid, C., & Laptev, I. (2021). Goal-conditioned reinforcement learning with imagined subgoals. In International Conference on Machine Learning (pp. 1430\u20131440). PMLR."},{"key":"6614_CR9","unstructured":"Chrisman, L. (1992). Reinforcement learning with perceptual aliasing: The perceptual distinctions approach. In: AAAI (vol. 1992, pp. 183\u2013188). Citeseer."},{"key":"6614_CR10","unstructured":"Chung, J., Gulcehre, C., Cho, K., & Bengio, Y. (2014). Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprintarXiv:1412.3555."},{"key":"6614_CR11","unstructured":"De\u00a0Giacomo, G., & Vardi, M. Y. (2013) Linear temporal logic and linear dynamic logic on finite traces. In IJCAI\u201913 Proceedings of the Twenty-Third International Joint Conference on Artificial Intelligence (pp. 854\u2013860). Association for Computing Machinery."},{"issue":"3","key":"6614_CR12","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1016\/0005-1098(89)90002-2","volume":"25","author":"CE Garcia","year":"1989","unstructured":"Garcia, C. E., Prett, D. M., & Morari, M. (1989). Model predictive control: Theory and practice-a survey. Automatica, 25(3), 335\u2013348.","journal-title":"Automatica"},{"key":"6614_CR13","unstructured":"Haarnoja, T., Zhou, A., Hartikainen, K., Tucker, G., Ha, S., Tan, J., Kumar, V., Zhu, H., Gupta, A., Abbeel, P., & Levine, S. (2018). Soft actor-critic algorithms and applications. arXiv preprintarXiv:1812.05905."},{"key":"6614_CR14","doi-asserted-by":"crossref","unstructured":"Hengst, F., Fran\u00e7ois-Lavet, V., Hoogendoorn, M., & Harmelen, F. (2022). Reinforcement learning with option machines. In Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence, IJCAI-22 (pp. 2909\u20132915). International Joint Conferences on Artificial Intelligence Organization.","DOI":"10.24963\/ijcai.2022\/403"},{"key":"6614_CR15","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511810275","volume-title":"Logic in computer science: Modelling and reasoning about systems","author":"M Huth","year":"2004","unstructured":"Huth, M., & Ryan, M. (2004). Logic in computer science: Modelling and reasoning about systems. Cambridge: Cambridge University Press."},{"key":"6614_CR16","unstructured":"Icarte, R. T., Klassen, T., Valenzano, R., & McIlraith, S. (2018). Using reward machines for high-level task specification and decomposition in reinforcement learning. In International Conference on Machine Learning (pp. 2107\u20132116). PMLR."},{"key":"6614_CR17","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1613\/jair.1.12440","volume":"73","author":"RT Icarte","year":"2022","unstructured":"Icarte, R. T., Klassen, T. Q., Valenzano, R., & McIlraith, S. A. (2022). Reward machines: Exploiting reward function structure in reinforcement learning. Journal of Artificial Intelligence Research, 73, 173\u2013208.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"6614_CR18","unstructured":"Inala, J. P., Ma, Y. J., Bastani, O., Zhang, X., & Solar-Lezama, A. (2021). Safe human-interactive control via shielding. arXiv preprintarXiv:2110.05440."},{"key":"6614_CR19","first-page":"10026","volume":"34","author":"K Jothimurugan","year":"2021","unstructured":"Jothimurugan, K., Bansal, S., Bastani, O., & Alur, R. (2021). Compositional reinforcement learning from logical specifications. Advances in Neural Information Processing Systems, 34, 10026\u201310039.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6614_CR20","unstructured":"Kipf, T. N., & Welling, M. (2016). Semi-supervised classification with graph convolutional networks. arXiv preprintarXiv:1609.02907."},{"key":"6614_CR21","doi-asserted-by":"crossref","unstructured":"Kuo, Y.-L., Katz, B., & Barbu, A. (2020). Encoding formulas as deep networks: Reinforcement learning for zero-shot execution of ltl formulas. In 2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS) (pp. 5604\u20135610). IEEE.","DOI":"10.1109\/IROS45743.2020.9341325"},{"issue":"2","key":"6614_CR22","doi-asserted-by":"publisher","first-page":"1232","DOI":"10.1109\/LRA.2019.2894852","volume":"4","author":"P-Y Lajoie","year":"2019","unstructured":"Lajoie, P.-Y., Hu, S., Beltrame, G., & Carlone, L. (2019). Modeling perceptual aliasing in slam via discrete-continuous graphical models. IEEE Robotics and Automation Letters, 4(2), 1232\u20131239.","journal-title":"IEEE Robotics and Automation Letters"},{"key":"6614_CR23","unstructured":"Lambert, N., Pister, K., & Calandra, R. (2022). Investigating compounding prediction errors in learned dynamics models. arXiv preprintarXiv:2203.09637."},{"key":"6614_CR24","unstructured":"Le\u00f3n, B. G., Shanahan, M., & Belardinelli, F. (2021). In a nutshell, the human asked for this: Latent goals for following temporal specifications. In International Conference on Learning Representations."},{"key":"6614_CR25","unstructured":"Le\u00f3n, B. G., Shanahan, M., & Belardinelli, F. (2020). Systematic generalisation through task temporal logic and deep reinforcement learning. arXiv preprintarXiv:2006.08767."},{"issue":"1","key":"6614_CR26","first-page":"1334","volume":"17","author":"S Levine","year":"2016","unstructured":"Levine, S., Finn, C., Darrell, T., & Abbeel, P. (2016). End-to-end training of deep visuomotor policies. The Journal of Machine Learning Research, 17(1), 1334\u20131373.","journal-title":"The Journal of Machine Learning Research"},{"key":"6614_CR27","unstructured":"Li, A. C., Chen, Z., Vaezipoor, P., Klassen, T. Q., Icarte, R. T., & McIlraith, S. A. (2022). Noisy symbolic abstractions for deep rl: A case study with reward machines. In: Deep Reinforcement Learning Workshop NeurIPS 2022."},{"key":"6614_CR28","unstructured":"Li, S., Zhang, J., Wang, J., Yu, Y., & Zhang, C. (2021). Active hierarchical exploration with stable subgoal representation learning. arXiv preprintarXiv:2105.14750."},{"key":"6614_CR29","unstructured":"Littman, M. L., Topcu, U., Fu, J., Isbell, C., Wen, M., MacGlashan, J. (2017). Environment-independent task specifications via gltl. arXiv preprintarXiv:1704.04341."},{"key":"6614_CR30","unstructured":"Liu, J. X., Shah, A., Rosen, E., Konidaris, G., & Tellex, S. (2022). Skill transfer for temporally-extended task specifications. arXiv preprintarXiv:2206.05096."},{"key":"6614_CR31","doi-asserted-by":"crossref","unstructured":"Liu, M., Zhu, M., & Zhang, W. (2022). Goal-conditioned reinforcement learning: Problems and solutions. arXiv preprintarXiv:2201.08299.","DOI":"10.24963\/ijcai.2022\/770"},{"issue":"7540","key":"6614_CR32","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A. A., Veness, J., Bellemare, M. G., Graves, A., Riedmiller, M., Fidjeland, A. K., & Ostrovski, G. (2015). Human-level control through deep reinforcement learning. Nature, 518(7540), 529\u2013533.","journal-title":"Nature"},{"key":"6614_CR33","doi-asserted-by":"crossref","unstructured":"Pnueli, A. (1977). The temporal logic of programs. In 18th Annual Symposium on Foundations of Computer Science (sfcs 1977) (pp. 46\u201357). IEEE.","DOI":"10.1109\/SFCS.1977.32"},{"key":"6614_CR34","unstructured":"Ray, A., Achiam, J., & Amodei, D. (2019). Benchmarking safe exploration in deep reinforcement learning. arXiv preprintarXiv:1910.017087, 1."},{"key":"6614_CR35","doi-asserted-by":"crossref","unstructured":"Schlichtkrull, M., Kipf, T. N., Bloem, P., Berg, R.v.d., Titov, I., & Welling, M. (2018). Modeling relational data with graph convolutional networks. In European Semantic Web Conference (pp. 593\u2013607). Springer.","DOI":"10.1007\/978-3-319-93417-4_38"},{"key":"6614_CR36","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., & Klimov, O. (2017). Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347."},{"key":"6614_CR37","volume-title":"A survey of model-based and model-free methods for resolving perceptual aliasing","author":"G Shani","year":"2004","unstructured":"Shani, G. (2004). A survey of model-based and model-free methods for resolving perceptual aliasing. Ben-Gurion University."},{"key":"6614_CR38","unstructured":"Shani, G., & Brafman, R. (2004). Resolving perceptual aliasing in the presence of noisy sensors. Advances in Neural Information Processing Systems, 17."},{"key":"6614_CR39","unstructured":"Sohn, S., Oh, J., Lee, H. (2018). Hierarchical reinforcement learning for zero-shot generalization with subtask dependencies. Advances in Neural Information Processing Systems, 31."},{"key":"6614_CR40","unstructured":"Sun, S.-H., Wu, T.-L., & Lim, J. J. (2019). Program guided agent. In International Conference on Learning Representations."},{"key":"6614_CR41","volume-title":"Reinforcement learning: An introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R. S., & Barto, A. G. (2018). Reinforcement learning: An introduction. MIT Press."},{"issue":"1\u20132","key":"6614_CR42","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton, R. S., Precup, D., & Singh, S. (1999). Between mdps and semi-mdps: A framework for temporal abstraction in reinforcement learning. Artificial Intelligence, 112(1\u20132), 181\u2013211.","journal-title":"Artificial Intelligence"},{"key":"6614_CR43","unstructured":"Taylor, M. E., Stone, P. (2009). Transfer learning for reinforcement learning domains: A survey. Journal of Machine Learning Research, 10(7)."},{"key":"6614_CR44","unstructured":"Toro\u00a0Icarte, R., Klassen, T. Q., Valenzano, R., McIlraith, S. A. (2018). Teaching multiple tasks to an rl agent using ltl. In Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems (pp. 452\u2013461)."},{"key":"6614_CR45","unstructured":"Vaezipoor, P., Li, A .C., Icarte, R. A. T., & Mcilraith, S. A. (2021). Ltl2action: Generalizing ltl instructions for multi-task rl. In: International Conference on Machine Learning (pp. 10497\u201310508). PMLR."},{"key":"6614_CR46","doi-asserted-by":"crossref","unstructured":"Van\u00a0Hasselt, H., Guez, A., & Silver, D. (2016). Deep reinforcement learning with double q-learning. In: Proceedings of the AAAI Conference on Artificial Intelligence (vol. 30).","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"6614_CR47","unstructured":"Xu, D., & Fekri, F. (2022). Generalizing ltl instructions via future dependent options. arXiv preprint arXiv:2212.04576."}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06614-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-024-06614-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06614-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:13:31Z","timestamp":1764267211000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-024-06614-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,26]]},"references-count":47,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2024,10]]}},"alternative-id":["6614"],"URL":"https:\/\/doi.org\/10.1007\/s10994-024-06614-y","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"type":"print","value":"0885-6125"},{"type":"electronic","value":"1573-0565"}],"subject":[],"published":{"date-parts":[[2024,8,26]]},"assertion":[{"value":"24 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 May 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 August 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 August 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}