{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T17:59:25Z","timestamp":1781027965570,"version":"3.54.1"},"publisher-location":"Cham","reference-count":45,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031308222","type":"print"},{"value":"9783031308239","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2023,4,22]],"date-time":"2023-04-22T00:00:00Z","timestamp":1682121600000},"content-version":"vor","delay-in-days":111,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:p>\n                    Mungojerrie is an extensible tool that provides a framework to translate linear-time objectives into reward for reinforcement learning (RL). The tool provides convergent RL algorithms for stochastic games, reference implementations of existing reward translations for\n                    <jats:inline-formula>\n                      <jats:alternatives>\n                        <jats:tex-math>$$\\omega $$<\/jats:tex-math>\n                        <mml:math xmlns:mml=\"http:\/\/www.w3.org\/1998\/Math\/MathML\">\n                          <mml:mi>\u03c9<\/mml:mi>\n                        <\/mml:math>\n                      <\/jats:alternatives>\n                    <\/jats:inline-formula>\n                    -regular objectives, and an internal probabilistic model checker for\n                    <jats:inline-formula>\n                      <jats:alternatives>\n                        <jats:tex-math>$$\\omega $$<\/jats:tex-math>\n                        <mml:math xmlns:mml=\"http:\/\/www.w3.org\/1998\/Math\/MathML\">\n                          <mml:mi>\u03c9<\/mml:mi>\n                        <\/mml:math>\n                      <\/jats:alternatives>\n                    <\/jats:inline-formula>\n                    -regular objectives. This functionality is modular and operates on shared data structures, which enables fast development of new translation techniques. Mungojerrie supports finite models specified in PRISM and\n                    <jats:inline-formula>\n                      <jats:alternatives>\n                        <jats:tex-math>$$\\omega $$<\/jats:tex-math>\n                        <mml:math xmlns:mml=\"http:\/\/www.w3.org\/1998\/Math\/MathML\">\n                          <mml:mi>\u03c9<\/mml:mi>\n                        <\/mml:math>\n                      <\/jats:alternatives>\n                    <\/jats:inline-formula>\n                    -automata specified in the HOA format, with an integrated command line interface to external linear temporal logic translators. Mungojerrie is distributed with a set of benchmarks for\n                    <jats:inline-formula>\n                      <jats:alternatives>\n                        <jats:tex-math>$$\\omega $$<\/jats:tex-math>\n                        <mml:math xmlns:mml=\"http:\/\/www.w3.org\/1998\/Math\/MathML\">\n                          <mml:mi>\u03c9<\/mml:mi>\n                        <\/mml:math>\n                      <\/jats:alternatives>\n                    <\/jats:inline-formula>\n                    -regular objectives in RL.\n                  <\/jats:p>","DOI":"10.1007\/978-3-031-30823-9_27","type":"book-chapter","created":{"date-parts":[[2023,4,21]],"date-time":"2023-04-21T16:19:12Z","timestamp":1682093952000},"page":"527-545","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Mungojerrie: Linear-Time Objectives in Model-Free Reinforcement Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9348-7684","authenticated-orcid":false,"given":"Ernst Moritz","family":"Hahn","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4220-3212","authenticated-orcid":false,"given":"Mateo","family":"Perez","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9093-9518","authenticated-orcid":false,"given":"Sven","family":"Schewe","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2085-2003","authenticated-orcid":false,"given":"Fabio","family":"Somenzi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9346-0126","authenticated-orcid":false,"given":"Ashutosh","family":"Trivedi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5560-0546","authenticated-orcid":false,"given":"Dominik","family":"Wojtczak","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2023,4,22]]},"reference":[{"key":"27_CR1","unstructured":"de\u00a0Alfaro, L.: Formal Verification of Probabilistic Systems. Ph.D. thesis, Stanford University (1998)"},{"key":"27_CR2","doi-asserted-by":"crossref","unstructured":"Babiak, T., Blahoudek, F., Duret-Lutz, A., Klein, J., K\u0159et\u00ednsk\u00fd, J., M\u00fcller, D., Parker, D., Strej\u010dek, J.: The Hanoi omega-automata format. In: Computer Aided Verification (CAV). pp. 479\u2013486 (2015), LNCS 9206","DOI":"10.1007\/978-3-319-21690-4_31"},{"key":"27_CR3","doi-asserted-by":"publisher","unstructured":"Bozkurt, A.K., Wang, Y., Zavlanos, M.M., Pajic, M.: Control synthesis from linear temporal logic specifications using model-free reinforcement learning. In: 2020 IEEE International Conference on Robotics and Automation (ICRA). pp. 10349\u201310355 (2020). https:\/\/doi.org\/10.1109\/ICRA40945.2020.9196796","DOI":"10.1109\/ICRA40945.2020.9196796"},{"key":"27_CR4","unstructured":"Brockman, G., Cheung, V., Pettersson, L., Schneider, J., Schulman, J., Tang, J., Zaremba, W.: OpenAI Gym. CoRR abs\/1606.01540 (2016)"},{"key":"27_CR5","doi-asserted-by":"crossref","unstructured":"Condon, A.: The complexity of stochastic games. Inf. Comput. 96(2), 203\u2013224 (1992)","DOI":"10.1016\/0890-5401(92)90048-K"},{"key":"27_CR6","doi-asserted-by":"publisher","unstructured":"Dole, K., Gupta, A., Komp, J., Krishna, S.N., Trivedi, A.: Event-triggered and time-triggered duration calculus for model-free reinforcement learning. In: 42nd IEEE Real-Time Systems Symposium, RTSS 2021, Dortmund, Germany, December 7-10, 2021. pp. 240\u2013252. IEEE (2021). https:\/\/doi.org\/10.1109\/RTSS52674.2021.00031, https:\/\/doi.org\/10.1109\/RTSS52674.2021.00031","DOI":"10.1109\/RTSS52674.2021.00031"},{"key":"27_CR7","doi-asserted-by":"crossref","unstructured":"Duret-Lutz, A., Lewkowicz, A., Fauchille, A., Michaud, T., Renault, E., Xu, L.: Spot 2.0 \u2014 a framework for LTL and $$\\omega $$-automata manipulation. In: Proceedings of the 14th International Symposium on Automated Technology for Verification and Analysis (ATVA\u201916). Lecture Notes in Computer Science, vol.\u00a09938, pp. 122\u2013129. Springer (Oct 2016)","DOI":"10.1007\/978-3-319-46520-3_8"},{"key":"27_CR8","doi-asserted-by":"crossref","unstructured":"Ellson, J., Gansner, E.R., Koutsofios, E., North, S.C., Woodhull, G.: Graphviz and dynagraph - static and dynamic graph drawing tools. In: J\u00fcnger, M., Mutzel, P. (eds.) Graph Drawing Software, pp. 127\u2013148. Springer (2004)","DOI":"10.1007\/978-3-642-18638-7_6"},{"key":"27_CR9","doi-asserted-by":"crossref","unstructured":"Etessami, K., Wilke, T., Schuller, A.: Fair simulation relations, parity games, and state space reduction for B\u00fcchi automata. In: Orejas, F., Spirakis, P.G., van Leeuwen, J. (eds.) Automata, Languages and Programming: 28th International Colloquium. pp. 694\u2013707. Springer, Crete, Greece (Jul 2001), lNCS 2076","DOI":"10.1007\/3-540-48224-5_57"},{"key":"27_CR10","unstructured":"Hahn, E.M., Li, G., Schewe, S., Turrini, A., Zhang, L.: Lazy probabilistic model checking without determinisation. In: Concurrency Theory, (CONCUR). pp. 354\u2013367 (2015)"},{"key":"27_CR11","doi-asserted-by":"crossref","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Omega-regular objectives in model-free reinforcement learning. In: Tools and Algorithms for the Construction and Analysis of Systems. pp. 395\u2013412 (2019), LNCS 11427","DOI":"10.1007\/978-3-030-17462-0_27"},{"key":"27_CR12","doi-asserted-by":"crossref","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Faithful and effective reward schemes for model-free reinforcement learning of omega-regular objectives. In: ATVA: Automated Technology for Verification and Analysis. pp. 108\u2013124 (2020), LNCS 12302","DOI":"10.1007\/978-3-030-59152-6_6"},{"key":"27_CR13","doi-asserted-by":"crossref","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Good-for-MDPs automata for probabilistic analysis and reinforcement learning. In: Tools and Algorithms for the Construction and Analysis of Systems. pp. 306\u2013323 (2020), LNCS 12078","DOI":"10.1007\/978-3-030-45190-5_17"},{"key":"27_CR14","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Model-free reinforcement learning for stochastic parity games. In: CONCUR: International Conference on Concurrency Theory. pp. 21:1\u201321:16 (Sep 2020), LIPIcs 171"},{"key":"27_CR15","doi-asserted-by":"crossref","unstructured":"Hahn, E.M., Schewe, S., Turrini, A., Zhang, L.: A simple algorithm for solving qualitative probabilistic parity games. In: Computer Aided Verification. pp. 291\u2013311. Part II (2016), LNCS 9780","DOI":"10.1007\/978-3-319-41540-6_16"},{"key":"27_CR16","doi-asserted-by":"crossref","unstructured":"Hahn, E.M., Schewe, S., Turrini, A., Zhang, L.: Synthesising strategy improvement and recursive algorithms for solving 2.5 player parity games. In: Verification, Model Checking, and Abstract Interpretation. pp. 266\u2013287 (2017)","DOI":"10.1007\/978-3-319-52234-0_15"},{"key":"27_CR17","doi-asserted-by":"crossref","unstructured":"Hahn, E., Li, Y., Schewe, S., Turrini, A., Zhang, L.: iscasMc: A web-based probabilistic model checker. In: International Symposium on Formal Methods. pp. 312\u2013317 (May 2014)","DOI":"10.1007\/978-3-319-06410-9_22"},{"key":"27_CR18","doi-asserted-by":"crossref","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Model-free reinforcement learning for branching markov decision processes. In: Silva, A., Leino, K.R.M. (eds.) Computer Aided Verification. pp. 651\u2013673. Springer International Publishing, Cham (2021)","DOI":"10.1007\/978-3-030-81688-9_30"},{"key":"27_CR19","doi-asserted-by":"crossref","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Model-free reinforcement learning for lexicographic omega-regular objectives. In: Formal Methods - 24th International Symposium. pp. 142\u2013159. LNCS 13047 (2021)","DOI":"10.1007\/978-3-030-90870-6_8"},{"key":"27_CR20","unstructured":"van Hasselt, H.: Double $$Q$$-learning. In: Advances in Neural Information Processing Systems. pp. 2613\u20132621 (2010)"},{"key":"27_CR21","doi-asserted-by":"crossref","unstructured":"Henzinger, T.A., Piterman, N.: Solving games without determinization. In: 15th Conference on Computer Science Logic. pp. 394\u2013409. Szeged, Hungary (Sep 2006), LNCS 4207","DOI":"10.1007\/11874683_26"},{"key":"27_CR22","unstructured":"Irpan, A.: Deep reinforcement learning doesn\u2019t work yet. https:\/\/www.alexirpan.com\/2018\/02\/14\/rl-hard.html (2018)"},{"key":"27_CR23","doi-asserted-by":"crossref","unstructured":"Kazemi, M., Perez, M., Somenzi, F., Soudjani, S., Trivedi, A., Velasquez, A.: Translating omega-regular specifications to average objectives for model-free reinforcement learning. In: Proceedings of the 21st International Conference on Autonomous Agents and Multiagent Systems. pp. 732\u2013741 (2022)","DOI":"10.65109\/XTSP7865"},{"key":"27_CR24","doi-asserted-by":"crossref","unstructured":"K\u0159et\u00ednsk\u00fd, J., Meggendorfer, T., Sickert, S.: Owl: A library for $$\\omega $$-words, automata, and LTL. In: Automated Technology for Verification and Analysis, ATVA. pp. 543\u2013550 (2018), LNCS 11138","DOI":"10.1007\/978-3-030-01090-4_34"},{"key":"27_CR25","doi-asserted-by":"crossref","unstructured":"Kwiatkowska, M., Norman, G., Parker, D.: PRISM 4.0: Verification of probabilistic real-time systems. In: Computer Aided Verification (CAV). pp. 585\u2013591 (Jul 2011), LNCS 6806","DOI":"10.1007\/978-3-642-22110-1_47"},{"key":"27_CR26","doi-asserted-by":"publisher","unstructured":"Lavaei, A., Somenzi, F., Soudjani, S., Trivedi, A., Zamani, M.: Formal controller synthesis for continuous-space mdps via model-free reinforcement learning. In: 11th ACM\/IEEE International Conference on Cyber-Physical Systems, ICCPS 2020, Sydney, Australia, April 21-25, 2020. pp. 98\u2013107. IEEE (2020). https:\/\/doi.org\/10.1109\/ICCPS48487.2020.00017","DOI":"10.1109\/ICCPS48487.2020.00017"},{"key":"27_CR27","doi-asserted-by":"crossref","unstructured":"Manna, Z., Pnueli, A.: The Temporal Logic of Reactive and Concurrent Systems *Specification*. Springer (1991)","DOI":"10.1007\/978-1-4612-0931-7"},{"key":"27_CR28","doi-asserted-by":"crossref","unstructured":"McNaughton, R.: Testing and generating infinite sequences by a finite automaton. Inf. Control. 9(5), 521\u2013530 (1966)","DOI":"10.1016\/S0019-9958(66)80013-X"},{"key":"27_CR29","doi-asserted-by":"crossref","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., et\u00a0al.: Human-level control through deep reinforcement learning. Nature 518 (2015)","DOI":"10.1038\/nature14236"},{"key":"27_CR30","unstructured":"Naik, A., Shariff, R., Yasui, N., Sutton, R.S.: Discounted reinforcement learning is not an optimization problem. CoRR abs\/1910.02140 (2019), http:\/\/arxiv.org\/abs\/1910.02140"},{"key":"27_CR31","unstructured":"OpenAI, Akkaya, I., Andrychowicz, M., Chociej, M., Litwin, M., McGrew, B., Petron, A., Paino, A., Plappert, M., Powell, G., Ribas, R., Schneider, J., Tezak, N., Tworek, J., Welinder, P., Weng, L., Yuan, Q., Zaremba, W., Zhang, L.: Solving rubik\u2019s cube with a robot hand. arXiv preprint (2019)"},{"key":"27_CR32","unstructured":"Perrin, D., Pin, J.\u00c9.: Infinite Words: Automata, Semigroups, Logic and Games. Elsevier (2004)"},{"key":"27_CR33","doi-asserted-by":"crossref","unstructured":"Pnueli, A.: The temporal semantics of concurrent programs. Theoret. Comput. Science 13, 45\u201360 (1981)","DOI":"10.1016\/0304-3975(81)90110-9"},{"key":"27_CR34","doi-asserted-by":"crossref","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. John Wiley & Sons, Inc., New York, NY, USA (1994)","DOI":"10.1002\/9780470316887"},{"key":"27_CR35","doi-asserted-by":"crossref","unstructured":"Sadigh, D., Kim, E., Coogan, S., Sastry, S.S., Seshia, S.A.: A learning based approach to control synthesis of Markov decision processes for linear temporal logic specifications. In: IEEE Conference on Decision and Control (CDC). pp. 1091\u20131096 (Dec 2014)","DOI":"10.1109\/CDC.2014.7039527"},{"key":"27_CR36","unstructured":"Schewe, S., Tang, Q., Zhanabekova, T.: Deciding what is good-for-mdps. CoRR abs\/2202.07629 (2022), https:\/\/arxiv.org\/abs\/2202.07629"},{"key":"27_CR37","doi-asserted-by":"crossref","unstructured":"Sickert, S., Esparza, J., Jaax, S., K\u0159et\u00ednsk\u00fd, J.: Limit-deterministic B\u00fcchi automata for linear temporal logic. In: Computer Aided Verification (CAV). pp. 312\u2013332 (2016), LNCS 9780","DOI":"10.1007\/978-3-319-41540-6_17"},{"key":"27_CR38","doi-asserted-by":"crossref","unstructured":"Silver, D., et\u00a0al.: Mastering the game of Go with deep neural networks and tree search. Nature 529, 484\u2013489 (Jan 2016)","DOI":"10.1038\/nature16961"},{"key":"27_CR39","unstructured":"Simovec, P.: Transformation of nondeterministic b\u00fcchi automata to slim automata (2021), https:\/\/is.muni.cz\/th\/nd15g\/"},{"key":"27_CR40","doi-asserted-by":"crossref","unstructured":"Sutton, R.S.: Learning to predict by the method of temporal differences. Machine Learning 3, 9\u201344 (1998)","DOI":"10.1023\/A:1022633531479"},{"key":"27_CR41","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, second edn. (2018)"},{"key":"27_CR42","unstructured":"Wan, Y., Naik, A., Sutton, R.S.: Learning and planning in average-reward markov decision processes. In: International Conference on Machine Learning. pp. 10653\u201310662. PMLR (2021)"},{"key":"27_CR43","doi-asserted-by":"crossref","unstructured":"Watkins, C.J.C.H., Dayan, P.: Q-learning. In: Machine Learning. pp. 279\u2013292 (1992)","DOI":"10.1023\/A:1022676722315"},{"key":"27_CR44","doi-asserted-by":"crossref","unstructured":"Wiewiora, E.: Reward shaping. In: Encyclopedia of Machine Learning, pp. 863\u2013865. Springer (2010)","DOI":"10.1007\/978-0-387-30164-8_731"},{"key":"27_CR45","doi-asserted-by":"crossref","unstructured":"Yang, C., Littman, M., Carbin, M.: Reinforcement learning for general ltl objectives is intractable. arXiv preprint arXiv:2111.12679 (2021)","DOI":"10.24963\/ijcai.2022\/507"}],"container-title":["Lecture Notes in Computer Science","Tools and Algorithms for the Construction and Analysis of Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-30823-9_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T17:07:35Z","timestamp":1781024855000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-30823-9_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031308222","9783031308239"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-30823-9_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"22 April 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TACAS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Tools and Algorithms for the Construction and Analysis of Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Paris","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"France","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 April 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 April 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tacas2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/etaps.org\/2023\/tacas","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}