{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,24]],"date-time":"2025-10-24T08:26:33Z","timestamp":1761294393051,"version":"3.40.3"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030908690"},{"type":"electronic","value":"9783030908706"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-90870-6_8","type":"book-chapter","created":{"date-parts":[[2021,11,10]],"date-time":"2021-11-10T00:06:44Z","timestamp":1636502804000},"page":"142-159","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Model-Free Reinforcement Learning for Lexicographic Omega-Regular Objectives"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9348-7684","authenticated-orcid":false,"given":"Ernst Moritz","family":"Hahn","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4220-3212","authenticated-orcid":false,"given":"Mateo","family":"Perez","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9093-9518","authenticated-orcid":false,"given":"Sven","family":"Schewe","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2085-2003","authenticated-orcid":false,"given":"Fabio","family":"Somenzi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9346-0126","authenticated-orcid":false,"given":"Ashutosh","family":"Trivedi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5560-0546","authenticated-orcid":false,"given":"Dominik","family":"Wojtczak","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,11,10]]},"reference":[{"key":"8_CR1","unstructured":"de Alfaro, L.: Formal verification of probabilistic systems. Ph.D. thesis, Stanford University (1998)"},{"key":"8_CR2","volume-title":"Principles of Model Checking","author":"C Baier","year":"2008","unstructured":"Baier, C., Katoen, J.P.: Principles of Model Checking. MIT Press, Cambridge (2008)"},{"key":"8_CR3","doi-asserted-by":"crossref","unstructured":"Bozkurt, A.K., Wang, Y., Pajic, M.: Model-free learning of safe yet effective controllers. arXiv preprint arXiv:2103.14600 (2021)","DOI":"10.1109\/CDC45484.2021.9683634"},{"key":"8_CR4","doi-asserted-by":"publisher","unstructured":"Bozkurt, A.K., Wang, Y., Zavlanos, M.M., Pajic, M.: Control synthesis from linear temporal logic specifications using model-free reinforcement learning. In: 2020 IEEE International Conference on Robotics and Automation (ICRA), pp. 10349\u201310355 (2020). https:\/\/doi.org\/10.1109\/ICRA40945.2020.9196796","DOI":"10.1109\/ICRA40945.2020.9196796"},{"key":"8_CR5","doi-asserted-by":"publisher","first-page":"473","DOI":"10.1007\/978-3-540-77050-3_39","volume-title":"FSTTCS 2007: Foundations of Software Technology and Theoretical Computer Science","author":"K Chatterjee","year":"2007","unstructured":"Chatterjee, K.: Markov decision processes with multiple long-run average objectives. In: Arvind, V., Prasad, S. (eds.) FSTTCS 2007: Foundations of Software Technology and Theoretical Computer Science, pp. 473\u2013484. Springer, Heidelberg (2007)"},{"key":"8_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1007\/11672142_26","volume-title":"STACS 2006","author":"K Chatterjee","year":"2006","unstructured":"Chatterjee, K., Majumdar, R., Henzinger, T.A.: Markov decision processes with multiple objectives. In: Durand, B., Thomas, W. (eds.) STACS 2006. LNCS, vol. 3884, pp. 325\u2013336. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11672142_26"},{"key":"8_CR7","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1007\/978-3-540-71209-1_6","volume-title":"Tools and Algorithms for the Construction and Analysis of Systems","author":"K Etessami","year":"2007","unstructured":"Etessami, K., Kwiatkowska, M., Vardi, M.Y., Yannakakis, M.: Multi-objective model checking of Markov decision processes. In: Grumberg, O., Huth, M. (eds.) Tools and Algorithms for the Construction and Analysis of Systems, pp. 50\u201365. Springer, Heidelberg (2007)"},{"key":"8_CR8","doi-asserted-by":"publisher","unstructured":"Feng, L., Wiltsche, C., Humphrey, L.R., Topcu, U.: Controller synthesis for autonomous systems interacting with human operators. In: Bayen, A.M., Branicky, M.S. (eds.) Proceedings of the ACM\/IEEE Sixth International Conference on Cyber-Physical Systems, ICCPS 2015, Seattle, WA, USA, 14\u201316, April, 2015, pp. 70\u201379. ACM (2015). https:\/\/doi.org\/10.1145\/2735960.2735973","DOI":"10.1145\/2735960.2735973"},{"key":"8_CR9","doi-asserted-by":"publisher","first-page":"112","DOI":"10.1007\/978-3-642-19835-9_11","volume-title":"Tools and Algorithms for the Construction and Analysis of Systems","author":"V Forejt","year":"2011","unstructured":"Forejt, V., Kwiatkowska, M., Norman, G., Parker, D., Qu, H.: Quantitative multi-objective verification for probabilistic systems. In: Abdulla, P.A., Leino, K.R.M. (eds.) Tools and Algorithms for the Construction and Analysis of Systems, pp. 112\u2013127. Springer, Heidelberg (2011)"},{"key":"8_CR10","doi-asserted-by":"crossref","unstructured":"Fu, J., Topcu, U.: Probably approximately correct MDP learning and control with temporal logic constraints. In: Robotics: Science and Systems, July 2014","DOI":"10.15607\/RSS.2014.X.039"},{"key":"8_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1007\/978-3-030-17462-0_27","volume-title":"Tools and Algorithms for the Construction and Analysis of Systems","author":"EM Hahn","year":"2019","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Omega-regular objectives in model-free reinforcement learning. In: Vojnar, T., Zhang, L. (eds.) TACAS 2019. LNCS, vol. 11427, pp. 395\u2013412. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-17462-0_27"},{"key":"8_CR12","doi-asserted-by":"crossref","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Good-for-mdps automata for probabilistic analysis and reinforcement learning. In: Tools and Algorithms for the Construction and Analysis of Systems (2020)","DOI":"10.1007\/978-3-030-45190-5_17"},{"key":"8_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"108","DOI":"10.1007\/978-3-030-59152-6_6","volume-title":"Automated Technology for Verification and Analysis","author":"EM Hahn","year":"2020","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Faithful and effective reward schemes for model-free reinforcement learning of omega-regular objectives. In: Hung, D.V., Sokolsky, O. (eds.) ATVA 2020. LNCS, vol. 12302, pp. 108\u2013124. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-59152-6_6"},{"key":"8_CR14","unstructured":"Hasanbeig, M., Abate, A., Kroening, D.: Logically-correct reinforcement learning. CoRR abs\/1801.08099 (2018). http:\/\/arxiv.org\/abs\/1801.08099"},{"key":"8_CR15","unstructured":"Hasanbeig, M., Abate, A., Kroening, D.: Certified reinforcement learning with logic guidance. arXiv:1902.00778 (2019)"},{"key":"8_CR16","doi-asserted-by":"publisher","unstructured":"Kret\u00ednsk\u00fd, J., P\u00e9rez, G.A., Raskin, J.: Learning-based mean-payoff optimization in an unknown MDP under omega-regular constraints. In: Schewe, S., Zhang, L. (eds.) 29th International Conference on Concurrency Theory, CONCUR 2018, 4\u20137, September, 2018, Beijing, China, vol. 118, pp. 8:1\u20138:18. LIPIcs, Schloss Dagstuhl - Leibniz-Zentrum f\u00fcr Informatik (2018). https:\/\/doi.org\/10.4230\/LIPIcs.CONCUR.2018.8","DOI":"10.4230\/LIPIcs.CONCUR.2018.8"},{"key":"8_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"585","DOI":"10.1007\/978-3-642-22110-1_47","volume-title":"Computer Aided Verification","author":"M Kwiatkowska","year":"2011","unstructured":"Kwiatkowska, M., Norman, G., Parker, D.: PRISM 4.0: verification of probabilistic real-time systems. In: Gopalakrishnan, G., Qadeer, S. (eds.) CAV 2011. LNCS, vol. 6806, pp. 585\u2013591. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-22110-1_47"},{"issue":"12\u201313","key":"8_CR18","doi-asserted-by":"publisher","first-page":"1272","DOI":"10.1016\/j.tcs.2008.12.058","volume":"410","author":"M Kwiatkowska","year":"2009","unstructured":"Kwiatkowska, M., Norman, G., Parker, D., Vigliotti, M.: Probabilistic mobile ambients. Theoretical Computer Science 410(12\u201313), 1272\u20131303 (2009)","journal-title":"Theoretical Computer Science"},{"issue":"7540","key":"8_CR19","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"8_CR20","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"ML Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley, Hoboken (1994)"},{"key":"8_CR21","doi-asserted-by":"crossref","unstructured":"Sadigh, D., Kim, E., Coogan, S., Sastry, S.S., Seshia, S.A.: A learning based approach to control synthesis of Markov decision processes for linear temporal logic specifications. In: CDC, pp. 1091\u20131096, December 2014","DOI":"10.21236\/ADA623517"},{"key":"8_CR22","doi-asserted-by":"crossref","unstructured":"Strehl, A.L., Li, L., Wiewiora, E., Langford, J., Littman, M.L.: PAC model-free reinforcement learning. In: International Conference on Machine Learning, ICM, pp. 881\u2013888 (2006)","DOI":"10.1145\/1143844.1143955"},{"key":"8_CR23","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction, 2nd edn. MIT Press, Cambridge (2018)","edition":"2"},{"issue":"3\u20134","key":"8_CR24","first-page":"279","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins, C.J., Dayan, P.: Q-learning. Mach. Learn. 8(3\u20134), 279\u2013292 (1992)","journal-title":"Mach. Learn."}],"container-title":["Lecture Notes in Computer Science","Formal Methods"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-90870-6_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,14]],"date-time":"2023-01-14T20:11:55Z","timestamp":1673727115000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-90870-6_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030908690","9783030908706"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-90870-6_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"10 November 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"FM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Formal Methods","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 November 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 November 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"fm2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/formalmethods2021.csp.escience.cn\/dct\/page\/1","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"131","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"40","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"31% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"9","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Additionally, this includes 4 invited full papers.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}