{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T23:15:27Z","timestamp":1769814927139,"version":"3.49.0"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030174613","type":"print"},{"value":"9783030174620","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-17462-0_27","type":"book-chapter","created":{"date-parts":[[2019,4,4]],"date-time":"2019-04-04T01:49:28Z","timestamp":1554342568000},"page":"395-412","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":65,"title":["Omega-Regular Objectives in Model-Free Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Ernst Moritz","family":"Hahn","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mateo","family":"Perez","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sven","family":"Schewe","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fabio","family":"Somenzi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ashutosh","family":"Trivedi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dominik","family":"Wojtczak","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,4,4]]},"reference":[{"key":"27_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"479","DOI":"10.1007\/978-3-319-21690-4_31","volume-title":"Computer Aided Verification","author":"T Babiak","year":"2015","unstructured":"Babiak, T., et al.: The Hanoi omega-automata format. In: Kroening, D., P\u0103s\u0103reanu, C.S. (eds.) CAV 2015. LNCS, vol. 9206, pp. 479\u2013486. Springer, Cham (2015). \n                      https:\/\/doi.org\/10.1007\/978-3-319-21690-4_31"},{"key":"27_CR2","volume-title":"Principles of Model Checking","author":"C Baier","year":"2008","unstructured":"Baier, C., Katoen, J.-P.: Principles of Model Checking. MIT Press, Cambridge (2008)"},{"key":"27_CR3","volume-title":"Neuro-Dynamic Programming","author":"DP Bertsekas","year":"1996","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro-Dynamic Programming. Athena Scientific, Belmont (1996)"},{"key":"27_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/978-3-319-11936-6_8","volume-title":"Automated Technology for Verification and Analysis","author":"T Br\u00e1zdil","year":"2014","unstructured":"Br\u00e1zdil, T., et al.: Verification of Markov decision processes using learning algorithms. In: Cassez, F., Raskin, J.-F. (eds.) ATVA 2014. LNCS, vol. 8837, pp. 98\u2013114. Springer, Cham (2014). \n                      https:\/\/doi.org\/10.1007\/978-3-319-11936-6_8"},{"key":"27_CR5","unstructured":"Brockman, G., et al.: OpenAI Gym. CoRR, abs\/1606.01540 (2016)"},{"key":"27_CR6","doi-asserted-by":"publisher","first-page":"495","DOI":"10.1051\/ita:1999129","volume":"33","author":"O Carton","year":"1999","unstructured":"Carton, O., Maceiras, R.: Computing the Rabin index of a parity automaton. Theoret. Inf. Appl. 33, 495\u2013505 (1999)","journal-title":"Theoret. Inf. Appl."},{"key":"27_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"559","DOI":"10.1007\/978-3-642-39799-8_37","volume-title":"Computer Aided Verification","author":"K Chatterjee","year":"2013","unstructured":"Chatterjee, K., Gaiser, A., K\u0159et\u00ednsk\u00fd, J.: Automata with generalized Rabin pairs for probabilistic model checking and LTL synthesis. In: Sharygina, N., Veith, H. (eds.) CAV 2013. LNCS, vol. 8044, pp. 559\u2013575. Springer, Heidelberg (2013). \n                      https:\/\/doi.org\/10.1007\/978-3-642-39799-8_37"},{"issue":"4","key":"27_CR8","doi-asserted-by":"publisher","first-page":"857","DOI":"10.1145\/210332.210339","volume":"42","author":"C Courcoubetis","year":"1995","unstructured":"Courcoubetis, C., Yannakakis, M.: The complexity of probabilistic verification. J. ACM 42(4), 857\u2013907 (1995)","journal-title":"J. ACM"},{"key":"27_CR9","unstructured":"cpphoafparser (2016). \n                      https:\/\/automata.tools\/hoa\/cpphoafparser\n                      \n                    . Accessesd 05 Sept 2018"},{"key":"27_CR10","unstructured":"de Alfaro, L.: Formal Verification of Probabilistic Systems. Ph.D. thesis, Stanford University (1998)"},{"key":"27_CR11","volume-title":"Old Possum\u2019s Book of Practical Cats","author":"TS Eliot","year":"1939","unstructured":"Eliot, T.S.: Old Possum\u2019s Book of Practical Cats. Harcourt Brace Jovanovich, San Diego (1939)"},{"key":"27_CR12","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4615-0805-2","volume-title":"Handbook of Markov Decision Processes","year":"2002","unstructured":"Feinberg, E.A., Shwartz, A. (eds.): Handbook of Markov Decision Processes. Springer, New York (2002). \n                      https:\/\/doi.org\/10.1007\/978-1-4615-0805-2"},{"key":"27_CR13","doi-asserted-by":"crossref","unstructured":"Fu, J., Topcu, U.: Probably approximately correct MDP learning and control with temporal logic constraints. In: Robotics: Science and Systems, July 2014","DOI":"10.15607\/RSS.2014.X.039"},{"key":"27_CR14","unstructured":"Guez, A., et al.: An investigation of model-free planning. CoRR, abs\/1901.03559 (2019)"},{"key":"27_CR15","unstructured":"Hahn, E.M., Li, G., Schewe, S., Turrini, A., Zhang, L.: Lazy probabilistic model checking without determinisation. In: Concurrency Theory (CONCUR), pp. 354\u2013367 (2015)"},{"key":"27_CR16","unstructured":"Hasanbeig, M., Abate, A., Kroening, D.: Logically-correct reinforcement learning. CoRR, abs\/1801.08099v1, January 2018"},{"key":"27_CR17","unstructured":"Hasanbeig, M., Abate, A., Kroening, D.: Certified reinforcement learning with logic guidance. arXiv e-prints, \n                      arXiv:1902.00778\n                      \n                    , February 2019"},{"key":"27_CR18","doi-asserted-by":"crossref","unstructured":"Hiromoto, M., Ushio, T.: Learning an optimal control policy for a Markov decision process under linear temporal logic specifications. In: Symposium Series on Computational Intelligence, pp. 548\u2013555, December 2015","DOI":"10.1109\/SSCI.2015.87"},{"key":"27_CR19","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1007\/978-1-4615-0805-2_8","volume-title":"Handbook of Markov Decision Processes: Methods and Applications","author":"A Hordijk","year":"2002","unstructured":"Hordijk, A., Yushkevich, A.A.: Blackwell optimality. In: Feinberg, E.A., Shwartz, A. (eds.) Handbook of Markov Decision Processes: Methods and Applications, pp. 231\u2013267. Springer, Boston (2002). \n                      https:\/\/doi.org\/10.1007\/978-1-4615-0805-2_8"},{"key":"27_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1007\/3-540-60045-0_55","volume-title":"Computer Aided Verification","author":"SC Krishnan","year":"1995","unstructured":"Krishnan, S.C., Puri, A., Brayton, R.K., Varaiya, P.P.: The Rabin index and chain automata, with applications to automata and games. In: Wolper, P. (ed.) CAV 1995. LNCS, vol. 939, pp. 253\u2013266. Springer, Heidelberg (1995). \n                      https:\/\/doi.org\/10.1007\/3-540-60045-0_55"},{"key":"27_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"585","DOI":"10.1007\/978-3-642-22110-1_47","volume-title":"Computer Aided Verification","author":"M Kwiatkowska","year":"2011","unstructured":"Kwiatkowska, M., Norman, G., Parker, D.: PRISM 4.0: verification of probabilistic real-time systems. In: Gopalakrishnan, G., Qadeer, S. (eds.) CAV 2011. LNCS, vol. 6806, pp. 585\u2013591. Springer, Heidelberg (2011). \n                      https:\/\/doi.org\/10.1007\/978-3-642-22110-1_47"},{"issue":"2","key":"27_CR22","doi-asserted-by":"publisher","first-page":"396","DOI":"10.1109\/TRO.2011.2172150","volume":"28","author":"M Lahijanian","year":"2012","unstructured":"Lahijanian, M., Andersson, S.B., Belta, C.: Temporal logic motion planning and control with probabilistic satisfaction guarantees. IEEE Trans. Robot. 28(2), 396\u2013409 (2012)","journal-title":"IEEE Trans. Robot."},{"key":"27_CR23","doi-asserted-by":"crossref","unstructured":"Li, X., Vasile, C.I., Belta, C.: Reinforcement learning with temporal logic rewards. In: International Conference on Intelligent Robots and Systesm (IROS), pp. 3834\u20133839 (2017)","DOI":"10.1109\/IROS.2017.8206234"},{"key":"27_CR24","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-0931-7","volume-title":"The Temporal Logic of Reactive and Concurrent Systems *Specification*","author":"Z Manna","year":"1991","unstructured":"Manna, Z., Pnueli, A.: The Temporal Logic of Reactive and Concurrent Systems *Specification*. Springer, New York (1991). \n                      https:\/\/doi.org\/10.1007\/978-1-4612-0931-7"},{"key":"27_CR25","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through reinforcement learning. Nature 518, 529\u2013533 (2015)","journal-title":"Nature"},{"key":"27_CR26","unstructured":"Mungojerrie\n                      \n                        \n                      \n                      $$\\omega $$\n                    -regular reinforcement learning benchmarks (2019). \n                      https:\/\/plv.colorado.edu\/omega-regular-rl-benchmarks-2019"},{"key":"27_CR27","unstructured":"OpenAI Gym (2018). \n                      https:\/\/gym.openai.com\n                      \n                    . Accessed 05 Sept 2018"},{"key":"27_CR28","volume-title":"Infinite Words: Automata, Semigroups, Logic and Games","author":"D Perrin","year":"2004","unstructured":"Perrin, D., Pin, J.\u00c9.: Infinite Words: Automata, Semigroups, Logic and Games. Elsevier, Amsterdam (2004)"},{"key":"27_CR29","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1007\/BF01843570","volume":"1","author":"A Pnueli","year":"1986","unstructured":"Pnueli, A., Zuck, L.: Verification of multiprocess probabilistic protocols. Distrib. Comput. 1, 53\u201372 (1986)","journal-title":"Distrib. Comput."},{"key":"27_CR30","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"ML Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley, New York (1994)"},{"key":"27_CR31","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1007\/11564096_32","volume-title":"Machine Learning: ECML 2005","author":"M Riedmiller","year":"2005","unstructured":"Riedmiller, M.: Neural fitted Q iteration \u2013 first experiences with a data efficient neural reinforcement learning method. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS (LNAI), vol. 3720, pp. 317\u2013328. Springer, Heidelberg (2005). \n                      https:\/\/doi.org\/10.1007\/11564096_32"},{"key":"27_CR32","doi-asserted-by":"crossref","unstructured":"Sadigh, D., Kim, E., Coogan, S., Sastry, S.S., Seshia, S.A.: A learning based approach to control synthesis of Markov decision processes for linear temporal logic specifications. In: IEEE Conference on Decision and Control (CDC), pp. 1091\u20131096, December 2014","DOI":"10.21236\/ADA623517"},{"key":"27_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1007\/978-3-319-41540-6_17","volume-title":"Computer Aided Verification","author":"S Sickert","year":"2016","unstructured":"Sickert, S., Esparza, J., Jaax, S., K\u0159et\u00ednsk\u00fd, J.: Limit-deterministic B\u00fcchi automata for linear temporal logic. In: Chaudhuri, S., Farzan, A. (eds.) CAV 2016. LNCS, vol. 9780, pp. 312\u2013332. Springer, Cham (2016). \n                      https:\/\/doi.org\/10.1007\/978-3-319-41540-6_17"},{"key":"27_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1007\/978-3-319-46520-3_9","volume-title":"Automated Technology for Verification and Analysis","author":"S Sickert","year":"2016","unstructured":"Sickert, S., K\u0159et\u00ednsk\u00fd, J.: MoChiBA: probabilistic LTL model checking using limit-deterministic B\u00fcchi automata. In: Artho, C., Legay, A., Peled, D. (eds.) ATVA 2016. LNCS, vol. 9938, pp. 130\u2013137. Springer, Cham (2016). \n                      https:\/\/doi.org\/10.1007\/978-3-319-46520-3_9"},{"key":"27_CR35","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529, 484\u2013489 (2016)","journal-title":"Nature"},{"key":"27_CR36","doi-asserted-by":"crossref","unstructured":"Strehl, A.L., Li, L., Wiewiora, E., Langford, J., Littman, M.L.: PAC model-free reinforcement learning. In: International Conference on Machine Learning ICML, pp. 881\u2013888 (2006)","DOI":"10.1145\/1143844.1143955"},{"key":"27_CR37","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction, 2nd edn. MIT Press, Cambridge (2018)","edition":"2"},{"key":"27_CR38","first-page":"133","volume-title":"Formal Models and Semantics","author":"Wolfgang THOMAS","year":"1990","unstructured":"Thomas, W.: Automata on infinite objects. In: Handbook of Theoretical Computer Science, pp. 133\u2013191. The MIT Press\/Elsevier, Cambridge (1990)"},{"key":"27_CR39","doi-asserted-by":"crossref","unstructured":"Vardi, M.Y.: Automatic verification of probabilistic concurrent finite state programs. In: Foundations of Computer Science, pp. 327\u2013338 (1985)","DOI":"10.1109\/SFCS.1985.12"},{"key":"27_CR40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27645-3","volume-title":"Reinforcement Learning: State of the Art","year":"2012","unstructured":"Wiering, M., van Otterlo, M. (eds.): Reinforcement Learning: State of the Art. Springer, Heidelberg (2012). \n                      https:\/\/doi.org\/10.1007\/978-3-642-27645-3"}],"container-title":["Lecture Notes in Computer Science","Tools and Algorithms for the Construction and Analysis of Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-17462-0_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,2]],"date-time":"2019-10-02T12:08:13Z","timestamp":1570018093000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-17462-0_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030174613","9783030174620"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-17462-0_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"4 April 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TACAS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Tools and Algorithms for the Construction and Analysis of Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Prague","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 April 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 April 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tacas2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.etaps.org\/2019\/tacas","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"164","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"42","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"8","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"26% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"13","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"12 full papers and 11 short papers accepted for TOOLympics and SV-COMP (avg. 4 reviewers\/paper, selected from 43 submissions)","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}