{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T23:08:50Z","timestamp":1774739330327,"version":"3.50.1"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031477041","type":"print"},{"value":"9783031477058","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,11,6]],"date-time":"2023-11-06T00:00:00Z","timestamp":1699228800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,6]],"date-time":"2023-11-06T00:00:00Z","timestamp":1699228800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-47705-8_14","type":"book-chapter","created":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T09:02:24Z","timestamp":1699606944000},"page":"257-276","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Reinforcement Learning Under Partial Observability Guided by\u00a0Learned Environment Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8089-5024","authenticated-orcid":false,"given":"Edi","family":"Mu\u0161kardin","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4193-5609","authenticated-orcid":false,"given":"Martin","family":"Tappler","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3484-5584","authenticated-orcid":false,"given":"Bernhard K.","family":"Aichernig","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8420-6377","authenticated-orcid":false,"given":"Ingo","family":"Pill","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,6]]},"reference":[{"key":"14_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"288","DOI":"10.1007\/978-3-030-59152-6_16","volume-title":"Automated Technology for Verification and Analysis","author":"A Bork","year":"2020","unstructured":"Bork, A., Junges, S., Katoen, J.-P., Quatmann, T.: Verification of indefinite-horizon POMDPs. In: Hung, D.V., Sokolsky, O. (eds.) ATVA 2020. LNCS, vol. 12302, pp. 288\u2013304. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-59152-6_16"},{"key":"14_CR2","unstructured":"Brockman, G., et al.: OpenAI gym. CoRR abs\/1606.01540 (2016)"},{"key":"14_CR3","doi-asserted-by":"publisher","unstructured":"Carr, S., Jansen, N., Junges, S., Topcu, U.: Safe reinforcement learning via shielding under partial observability. In: Williams, B., Chen, Y., Neville, J. (eds.) Thirty-Seventh AAAI Conference on Artificial Intelligence, AAAI 2023, Thirty-Fifth Conference on Innovative Applications of Artificial Intelligence, IAAI 2023, Thirteenth Symposium on Educational Advances in Artificial Intelligence, EAAI 2023, Washington, DC, USA, 7\u201314 February 2023, pp. 14748\u201314756. AAAI Press (2023). https:\/\/doi.org\/10.1609\/aaai.v37i12.26723","DOI":"10.1609\/aaai.v37i12.26723"},{"key":"14_CR4","doi-asserted-by":"publisher","unstructured":"Carr, S., Jansen, N., Topcu, U.: Task-aware verifiable RNN-based policies for partially observable markov decision processes. J. Artif. Intell. Res. 72, 819\u2013847 (2021). https:\/\/doi.org\/10.1613\/jair.1.12963","DOI":"10.1613\/jair.1.12963"},{"key":"14_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1007\/3-540-58473-0_144","volume-title":"Grammatical Inference and Applications","author":"RC Carrasco","year":"1994","unstructured":"Carrasco, R.C., Oncina, J.: Learning stochastic regular grammars by means of a state merging method. In: Carrasco, R.C., Oncina, J. (eds.) ICGI 1994. LNCS, vol. 862, pp. 139\u2013152. Springer, Heidelberg (1994). https:\/\/doi.org\/10.1007\/3-540-58473-0_144"},{"key":"14_CR6","unstructured":"Cassandra, A.R., Kaelbling, L.P., Littman, M.L.: Acting optimally in partially observable stochastic domains. In: AAAI (1994)"},{"key":"14_CR7","doi-asserted-by":"crossref","unstructured":"Chatterjee, K., Chmelik, M., Gupta, R., Kanodia, A.: Qualitative analysis of POMDPs with temporal logic specifications for robotics applications. In: 2015 IEEE International Conference on Robotics and Automation (ICRA) (2015)","DOI":"10.1109\/ICRA.2015.7139019"},{"key":"14_CR8","unstructured":"Chrisman, L.: Reinforcement learning with perceptual aliasing: the perceptual distinctions approach. In: AAAI Conference on Artificial Intelligence (AAAI), pp. 183\u2013188 (1992)"},{"key":"14_CR9","doi-asserted-by":"crossref","unstructured":"Furelos-Blanco, D., Law, M., Russo, A., Broda, K., Jonsson, A.: Induction of subgoal automata for reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI) (2020)","DOI":"10.1609\/aaai.v34i04.5802"},{"key":"14_CR10","doi-asserted-by":"crossref","unstructured":"Gaon, M., Brafman, R.I.: Reinforcement learning with non-Markovian rewards. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI) (2020)","DOI":"10.1609\/aaai.v34i04.5814"},{"key":"14_CR11","doi-asserted-by":"crossref","unstructured":"Hasanbeig, M., Jeppu, N.Y., Abate, A., Melham, T., Kroening, D.: DeepSynth: automata synthesis for automatic task segmentation in deep reinforcement learning. In: AAAI Conference on Artificial Intelligence (AAAI) (2021)","DOI":"10.1609\/aaai.v35i9.16935"},{"key":"14_CR12","unstructured":"Hausknecht, M.J., Stone, P.: Deep recurrent Q-learning for partially observable MDPs. In: AAAI Conference on Artificial Intelligence (AAAI) (2015)"},{"key":"14_CR13","unstructured":"Hill, A., et al.: Stable baselines. https:\/\/github.com\/hill-a\/stable-baselines (2018)"},{"issue":"301","key":"14_CR14","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1080\/01621459.1963.10500830","volume":"58","author":"W Hoeffding","year":"1963","unstructured":"Hoeffding, W.: Probability inequalities for sums of bounded random variables. J. Am. Stat. Assoc. 58(301), 13\u201330 (1963)","journal-title":"J. Am. Stat. Assoc."},{"key":"14_CR15","unstructured":"Icarte, R.T., Klassen, T.Q., Valenzano, R.A., McIlraith, S.A.: Using reward machines for high-level task specification and decomposition in reinforcement learning. In: International Conference on Machine Learning (ICML) (2018)"},{"key":"14_CR16","unstructured":"Icarte, R.T., Waldie, E., Klassen, T.Q., Valenzano, R.A., Castro, M.P., McIlraith, S.A.: Learning reward machines for partially observable reinforcement learning. In: Advances in Neural Information Processing Systems (NeurIPS) (2019)"},{"key":"14_CR17","doi-asserted-by":"crossref","unstructured":"Mao, H., Chen, Y., Jaeger, M., Nielsen, T.D., Larsen, K.G., Nielsen, B.: Learning Markov decision processes for model checking. In: Quantities in Formal Methods (QFM) (2012)","DOI":"10.4204\/EPTCS.103.6"},{"issue":"2","key":"14_CR18","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1007\/s10994-016-5565-9","volume":"105","author":"H Mao","year":"2016","unstructured":"Mao, H., Chen, Y., Jaeger, M., Nielsen, T.D., Larsen, K.G., Nielsen, B.: Learning deterministic probabilistic automata from a model checking perspective. Mach. Learn. 105(2), 255\u2013299 (2016). https:\/\/doi.org\/10.1007\/s10994-016-5565-9","journal-title":"Mach. Learn."},{"key":"14_CR19","doi-asserted-by":"crossref","unstructured":"McCallum, A.: Overcoming incomplete perception with utile distinction memory. In: International Conference on Machine Learning (ICML), pp. 190\u2013196 (1993)","DOI":"10.1016\/B978-1-55860-307-3.50031-9"},{"key":"14_CR20","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning (ICML) (2016)"},{"key":"14_CR21","unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. CoRR abs\/1312.5602 (2013)"},{"key":"14_CR22","doi-asserted-by":"crossref","unstructured":"Mu\u0161kardin, E., Aichernig, B.K., Pill, I., Pferscher, A., Tappler, M.: AALpy: an active automata learning library. In: 19th International Symposium on Automated Technology for Verification and Analysis (ATVA) (2021)","DOI":"10.1007\/978-3-030-88885-5_5"},{"key":"14_CR23","doi-asserted-by":"crossref","unstructured":"Neider, D., Gaglione, J., Gavran, I., Topcu, U., Wu, B., Xu, Z.: Advice-guided reinforcement learning in a non-Markovian environment. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI) (2021)","DOI":"10.1609\/aaai.v35i10.17096"},{"key":"14_CR24","doi-asserted-by":"publisher","unstructured":"Oncina, J., Garcia, P.: Identifying regular languages in polynomial time. In: Advances in Structural and Syntactic Pattern Recognition. Machine Perception and Artificial Intelligence, vol. 5, pp. 99\u2013108. World Scientific (1992). https:\/\/doi.org\/10.1142\/9789812797919_0007","DOI":"10.1142\/9789812797919_0007"},{"key":"14_CR25","doi-asserted-by":"crossref","unstructured":"Singh, S.P., Jaakkola, T.S., Jordan, M.I.: Learning without state-estimation in partially observable Markovian decision processes. In: International Conference on Machine Learning (ICML), pp. 284\u2013292. Morgan Kaufmann (1994)","DOI":"10.1016\/B978-1-55860-335-6.50042-8"},{"key":"14_CR26","doi-asserted-by":"publisher","unstructured":"Tappler, M., Pranger, S., K\u00f6nighofer, B., Muskardin, E., Bloem, R., Larsen, K.G.: Automata learning meets shielding. In: Margaria, T., Steffen, B. (eds.) Leveraging Applications of Formal Methods, Verification and Validation. Verification Principles \u2013 11th International Symposium, ISoLA 2022, Rhodes, Greece, 22\u201330 October 2022, Proceedings, Part I. Lecture Notes in Computer Science, vol. 13701, pp. 335\u2013359. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-19849-6_20","DOI":"10.1007\/978-3-031-19849-6_20"},{"key":"14_CR27","unstructured":"Velasquez, A., Beckus, A., Dohmen, T., Trivedi, A., Topper, N., Atia, G.K.: Learning probabilistic reward machines from non-Markovian stochastic reward processes. CoRR abs\/2107.04633 (2021)"},{"key":"14_CR28","doi-asserted-by":"crossref","unstructured":"Watkins, C.J.C.H., Dayan, P.: Q-learning. Mach. Learn. 8(3), 279\u2013292 (1992)","DOI":"10.1007\/BF00992698"},{"key":"14_CR29","unstructured":"Wu, Y., Mansimov, E., Grosse, R.B., Liao, S., Ba, J.: Scalable trust-region method for deep reinforcement learning using Kronecker-factored approximation. In: Advances in Neural Information Processing Systems (NIPS) (2017)"},{"key":"14_CR30","doi-asserted-by":"crossref","unstructured":"Xu, Z., et al.: Joint inference of reward machines and policies for reinforcement learning. In: Proceedings of the International Conference on Automated Planning and Scheduling (ICAPS) (2020)","DOI":"10.1609\/icaps.v30i1.6756"},{"key":"14_CR31","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1007\/978-3-030-84060-0_8","volume-title":"Machine Learning and Knowledge Extraction","author":"Z Xu","year":"2021","unstructured":"Xu, Z., Wu, B., Ojha, A., Neider, D., Topcu, U.: Active finite reward automaton inference and reinforcement learning using queries and counterexamples. In: Holzinger, A., Kieseberg, P., Tjoa, A.M., Weippl, E. (eds.) CD-MAKE 2021. LNCS, vol. 12844, pp. 115\u2013135. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-84060-0_8"}],"container-title":["Lecture Notes in Computer Science","Integrated Formal Methods"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-47705-8_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,18]],"date-time":"2024-04-18T14:50:40Z","timestamp":1713451840000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-47705-8_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,6]]},"ISBN":["9783031477041","9783031477058"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-47705-8_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,11,6]]},"assertion":[{"value":"6 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"iFM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Integrated Formal Methods","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Leiden","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 November 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 November 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ifm2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/liacs.leidenuniv.nl\/~bonsanguemm\/ifm23\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"51","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"16","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}