{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,20]],"date-time":"2026-06-20T16:40:15Z","timestamp":1781973615311,"version":"3.54.5"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030591519","type":"print"},{"value":"9783030591526","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-59152-6_6","type":"book-chapter","created":{"date-parts":[[2020,10,11]],"date-time":"2020-10-11T23:02:35Z","timestamp":1602457355000},"page":"108-124","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":19,"title":["Faithful and Effective Reward Schemes for Model-Free Reinforcement Learning of Omega-Regular Objectives"],"prefix":"10.1007","author":[{"given":"Ernst Moritz","family":"Hahn","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mateo","family":"Perez","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sven","family":"Schewe","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fabio","family":"Somenzi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ashutosh","family":"Trivedi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dominik","family":"Wojtczak","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2020,10,12]]},"reference":[{"key":"6_CR1","doi-asserted-by":"crossref","unstructured":"Andersson, D., Miltersen, P.B.: The complexity of solving stochastic games on graphs. In: Algorithms and Computation, pp. 112\u2013121 (2009)","DOI":"10.1007\/978-3-642-10631-6_13"},{"key":"6_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"479","DOI":"10.1007\/978-3-319-21690-4_31","volume-title":"Computer Aided Verification","author":"T Babiak","year":"2015","unstructured":"Babiak, T., et al.: The hanoi omega-automata format. In: Kroening, D., P\u0103s\u0103reanu, C.S. (eds.) CAV 2015. LNCS, vol. 9206, pp. 479\u2013486. Springer, Cham (2015). \nhttps:\/\/doi.org\/10.1007\/978-3-319-21690-4_31"},{"key":"6_CR3","doi-asserted-by":"crossref","unstructured":"Bozkurt, A.K., Wang, Y., Zavlanos, M.M., Pajic, M.: Control synthesis from linear temporal logic specifications using model-free reinforcement learning. CoRR, abs\/1909.07299 (2019)","DOI":"10.1109\/ICRA40945.2020.9196796"},{"key":"6_CR4","unstructured":"Brockman, G., et al.: OpenAI Gym. CoRR, abs\/1606.01540 (2016)"},{"key":"6_CR5","doi-asserted-by":"crossref","unstructured":"Camacho, A., Toro Icarte, R., Klassen, T.Q., Valenzano, R., McIlraith, S.A.: LTL and beyond: formal languages for reward function specification in reinforcement learning. In: Joint Conference on Artificial Intelligence, pp. 6065\u20136073 (2019)","DOI":"10.24963\/ijcai.2019\/840"},{"issue":"4","key":"6_CR6","doi-asserted-by":"publisher","first-page":"857","DOI":"10.1145\/210332.210339","volume":"42","author":"C Courcoubetis","year":"1995","unstructured":"Courcoubetis, C., Yannakakis, M.: The complexity of probabilistic verification. J. ACM 42(4), 857\u2013907 (1995)","journal-title":"J. ACM"},{"key":"6_CR7","unstructured":"cpphoafparser (2016). \nhttps:\/\/automata.tools\/hoa\/cpphoafparser\n\n. Accessed 05 Sept 2018"},{"key":"6_CR8","unstructured":"De Giacomo, G., Vardi, M.Y.: Linear temporal logic and linear dynamic logic on finite traces. In: IJCAI, pp. 854\u2013860 (2013)"},{"issue":"5","key":"6_CR9","doi-asserted-by":"publisher","first-page":"1159","DOI":"10.1137\/S0097539703420675","volume":"34","author":"K Etessami","year":"2005","unstructured":"Etessami, K., Wilke, T., Schuller, R.A.: Fair simulation relations, parity games, and state space reduction for B\u00fcchi automata. SIAM J. Comput. 34(5), 1159\u20131175 (2005)","journal-title":"SIAM J. Comput."},{"key":"6_CR10","doi-asserted-by":"crossref","unstructured":"Fu, J., Topcu, U.: Probably approximately correct MDP learning and control with temporal logic constraints. In: Robotics Science and Systems (2014)","DOI":"10.15607\/RSS.2014.X.039"},{"key":"6_CR11","unstructured":"Hahn, E.M., Li, G., Schewe, S., Turrini, A., Zhang, L.: Lazy probabilistic model checking without determinisation. In: Concurrency Theory, pp. 354\u2013367 (2015)"},{"key":"6_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1007\/978-3-030-17462-0_27","volume-title":"Tools and Algorithms for the Construction and Analysis of Systems","author":"EM Hahn","year":"2019","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Omega-regular objectives in model-free reinforcement learning. In: Vojnar, T., Zhang, L. (eds.) TACAS 2019. LNCS, vol. 11427, pp. 395\u2013412. Springer, Cham (2019). \nhttps:\/\/doi.org\/10.1007\/978-3-030-17462-0_27"},{"key":"6_CR13","doi-asserted-by":"crossref","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Good-for-MDPs automata for probabilistic analysis and reinforcement learning. In: Tools and Algorithms for the Construction and Analysis of Systems, pp. 306\u2013323 (2020)","DOI":"10.1007\/978-3-030-45190-5_17"},{"key":"6_CR14","doi-asserted-by":"crossref","unstructured":"Hasanbeig, M., Kantaros, Y., Abate, A., Kroening, D., Pappas, G.J., Lee, I.: Reinforcement learning for temporal logic control synthesis with probabilistic satisfaction guarantees. In: Conference on Decision and Control, December 2019","DOI":"10.1109\/CDC40024.2019.9028919"},{"key":"6_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1007\/3-540-63141-0_19","volume-title":"CONCUR1997: Concurrency Theory","author":"TA Henzinger","year":"1997","unstructured":"Henzinger, T.A., Kupferman, O., Rajamani, S.K.: Fair simulation. In: Mazurkiewicz, A., Winkowski, J. (eds.) CONCUR 1997. LNCS, vol. 1243, pp. 273\u2013287. Springer, Heidelberg (1997). \nhttps:\/\/doi.org\/10.1007\/3-540-63141-0_19"},{"key":"6_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1007\/11874683_26","volume-title":"Computer Science Logic","author":"TA Henzinger","year":"2006","unstructured":"Henzinger, T.A., Piterman, N.: Solving games without determinization. In: \u00c9sik, Z. (ed.) CSL 2006. LNCS, vol. 4207, pp. 395\u2013410. Springer, Heidelberg (2006). \nhttps:\/\/doi.org\/10.1007\/11874683_26"},{"key":"6_CR17","doi-asserted-by":"publisher","unstructured":"Hordijk, A., Yushkevich, A.A.: Handbook of Markov Decision Processes Methods and Applications, pp. 231\u2013267. Springer, New York (2002). \nhttps:\/\/doi.org\/10.1007\/978-1-4615-0805-2","DOI":"10.1007\/978-1-4615-0805-2"},{"key":"6_CR18","unstructured":"Icarte, T.R., Klassen, T.Q., Valenzano, R.A., McIlraith, S.A.: Using reward machines for high-level task specification and decomposition in reinforcement learning. In: Conference on Machine Learning, pp. 2112\u20132121, July 2018"},{"key":"6_CR19","unstructured":"Irpan, A.: Deep reinforcement learning doesn\u2019t work yet. \nhttps:\/\/www.alexirpan.com\/2018\/02\/14\/rl-hard.html\n\n (2018)"},{"key":"6_CR20","unstructured":"K\u0159et\u00ednsk\u00fd, J., P\u00e9rez, G.A., Raskin, J.-F.: Learning-based mean-payoff optimization in an unknown MDP under omega-regular constraints. In: CONCUR, vol. 118, LIPIcs, pp. 8:1\u20138:18 (2018)"},{"key":"6_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"585","DOI":"10.1007\/978-3-642-22110-1_47","volume-title":"Computer Aided Verification","author":"M Kwiatkowska","year":"2011","unstructured":"Kwiatkowska, M., Norman, G., Parker, D.: PRISM 4.0: verification of probabilistic real-time systems. In: Gopalakrishnan, G., Qadeer, S. (eds.) CAV 2011. LNCS, vol. 6806, pp. 585\u2013591. Springer, Heidelberg (2011). \nhttps:\/\/doi.org\/10.1007\/978-3-642-22110-1_47"},{"key":"6_CR22","doi-asserted-by":"crossref","unstructured":"Lavaei, A., Somenzi, F., Soudjani, S., Trivedi, A., Zamani, M.: Formal controller synthesis for unknown continuous-space MDPs via model-free reinforcement learning. In: International Conference on Cyber-Physical Systems, April 2020","DOI":"10.1109\/ICCPS48487.2020.00017"},{"key":"6_CR23","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1007\/978-1-4615-0805-2_3","volume-title":"Handbook of Markov Decision Processes","author":"ME Lewis","year":"2002","unstructured":"Lewis, M.E.: Bias optimality. In: Feinberg, E.A., Shwartz, A. (eds.) Handbook of Markov Decision Processes, pp. 89\u2013111. Springer, Boston (2002). \nhttps:\/\/doi.org\/10.1007\/978-1-4615-0805-2_3"},{"issue":"4","key":"6_CR24","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1137\/1011093","volume":"11","author":"TM Liggett","year":"1969","unstructured":"Liggett, T.M., Lippman, S.A.: Short notes: stochastic games with perfect information and time average payoff. SIAM Rev. 11(4), 604\u2013607 (1969)","journal-title":"SIAM Rev."},{"key":"6_CR25","doi-asserted-by":"crossref","unstructured":"Manna, Z., Pnueli, A.: The Temporal Logic of Reactive and Concurrent Systems *Specification*. Springer (1991)","DOI":"10.1007\/978-1-4612-0931-7"},{"key":"6_CR26","unstructured":"Milnerm, R.: An algebraic definition of simulation between programs. Int. Joint Conf. Artif. Intell. 23, 481\u2013489 (1971)"},{"key":"6_CR27","unstructured":"Ng, A.Y., Harada, D., Russell, S.J.: Policy invariance under reward transformations: theory and application to reward shaping. In: International Conference on Machine Learning, pp. 278\u2013287 (1999)"},{"key":"6_CR28","unstructured":"Perrin, D., Pin, J.-\u00c9.: Infinite Words: Automata, Semigroups. Elsevier, Logic and Games (2004)"},{"key":"6_CR29","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"ML Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley, New York (1994)"},{"key":"6_CR30","doi-asserted-by":"crossref","unstructured":"Sadigh, D., Kim, E., Coogan, S., Sastry, S.S., Seshia, S.A.: A learning based approach to control synthesis of Markov decision processes for linear temporal logic specifications. In: CDC, pp. 1091\u20131096, December 2014","DOI":"10.21236\/ADA623517"},{"key":"6_CR31","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"312","DOI":"10.1007\/978-3-319-41540-6_17","volume-title":"Computer Aided Verification","author":"S Sickert","year":"2016","unstructured":"Sickert, S., Esparza, J., Jaax, S., K\u0159et\u00ednsk\u00fd, J.: Limit-deterministic B\u00fcchi automata for linear temporal logic. In: Chaudhuri, S., Farzan, A. (eds.) CAV 2016. LNCS, vol. 9780, pp. 312\u2013332. Springer, Cham (2016). \nhttps:\/\/doi.org\/10.1007\/978-3-319-41540-6_17"},{"key":"6_CR32","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1007\/978-3-030-28423-7_2","volume-title":"Numerical Software Verification","author":"F Somenzi","year":"2019","unstructured":"Somenzi, F., Trivedi, A.: Reinforcement learning and formal requirements. In: Zamani, M., Zufferey, D. (eds.) NSV 2019. LNCS, vol. 11652, pp. 26\u201341. Springer, Cham (2019). \nhttps:\/\/doi.org\/10.1007\/978-3-030-28423-7_2"},{"key":"6_CR33","doi-asserted-by":"crossref","unstructured":"Strehl, A.L., Li, L., Wiewiora, E., Langford, J., Littman, M.L.: PAC model-free reinforcement learning. In: International Conference on Machine Learning, ICML, pp. 881\u2013888 (2006)","DOI":"10.1145\/1143844.1143955"},{"key":"6_CR34","unstructured":"RMACC Summit Supercomputer. \nhttps:\/\/rmacc.org\/rmaccsummit"},{"key":"6_CR35","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learnging: An Introduction. MIT Press, 2nd edn (2018)"},{"key":"6_CR36","unstructured":"van Hasselt, H.: Double $$Q$$-learning. In: Advances in Neural Information Processing Systems, pp. 2613\u20132621 (2010"},{"key":"6_CR37","doi-asserted-by":"crossref","unstructured":"Vardi, M.Y.: Automatic verification of probabilistic concurrent finite state programs. In: Foundations of Computer Science, pp. 327\u2013338 (1985)","DOI":"10.1109\/SFCS.1985.12"},{"issue":"3\u20134","key":"6_CR38","first-page":"279","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins, C.J.C.H., Dayan, P.: Q-learning. Mach. Learn. 8(3\u20134), 279\u2013292 (1992)","journal-title":"Mach. Learn."}],"container-title":["Lecture Notes in Computer Science","Automated Technology for Verification and Analysis"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-59152-6_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,10,11]],"date-time":"2020-10-11T23:05:05Z","timestamp":1602457505000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-59152-6_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030591519","9783030591526"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-59152-6_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"12 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ATVA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Automated Technology for Verification and Analysis","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hanoi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"atva2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/fit.uet.vnu.edu.vn\/atva2020\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"75","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"36% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}