{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T04:36:37Z","timestamp":1743136597528,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031199912"},{"type":"electronic","value":"9783031199929"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-19992-9_3","type":"book-chapter","created":{"date-parts":[[2022,10,22]],"date-time":"2022-10-22T09:12:06Z","timestamp":1666429926000},"page":"42-57","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["An Impossibility Result in\u00a0Automata-Theoretic Reinforcement Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9348-7684","authenticated-orcid":false,"given":"Ernst Moritz","family":"Hahn","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4220-3212","authenticated-orcid":false,"given":"Mateo","family":"Perez","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9093-9518","authenticated-orcid":false,"given":"Sven","family":"Schewe","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2085-2003","authenticated-orcid":false,"given":"Fabio","family":"Somenzi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9346-0126","authenticated-orcid":false,"given":"Ashutosh","family":"Trivedi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5560-0546","authenticated-orcid":false,"given":"Dominik","family":"Wojtczak","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,10,21]]},"reference":[{"key":"3_CR1","unstructured":"de Alfaro, L.: Formal verification of probabilistic systems. Ph.D. thesis, Stanford University (1998)"},{"key":"3_CR2","unstructured":"Baier, C., Katoen, J.P.: Principles of Model Checking (Representation and Mind Series). The MIT Press (2008)"},{"key":"3_CR3","unstructured":"Bertsekas, D.: Reinforcement Learning and Optimal Control. Athena Scientific (2019)"},{"issue":"1","key":"3_CR4","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s10703-007-0043-4","volume":"32","author":"P Bouyer","year":"2008","unstructured":"Bouyer, P., Brinksma, E., Larsen, K.G.: Optimal infinite scheduling for multi-priced timed automata. Formal Methods Syst. Des. 32(1), 3\u201323 (2008)","journal-title":"Formal Methods Syst. Des."},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Bozkurt, A.K., Wang, Y., Zavlanos, M.M., Pajic, M.: Control synthesis from linear temporal logic specifications using model-free reinforcement learning. In: International Conference on Robotics and Automation (ICRA), pp. 10349\u201310355 (2020)","DOI":"10.1109\/ICRA40945.2020.9196796"},{"key":"3_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1007\/3-540-61042-1_46","volume-title":"Tools and Algorithms for the Construction and Analysis of Systems","author":"N Buhrke","year":"1996","unstructured":"Buhrke, N., Lescow, H., V\u00f6ge, J.: Strategy construction in infinite games with Streett and Rabin chain winning conditions. In: Margaria, T., Steffen, B. (eds.) TACAS 1996. LNCS, vol. 1055, pp. 207\u2013224. Springer, Heidelberg (1996). https:\/\/doi.org\/10.1007\/3-540-61042-1_46"},{"key":"3_CR7","doi-asserted-by":"crossref","unstructured":"Camacho, A., Toro Icarte, R., Klassen, T.Q., Valenzano, R.A., McIlraith, S.A.: LTL and beyond: formal languages for reward function specification in reinforcement learning. In: IJCAI, vol. 19, pp. 6065\u20136073 (2019)","DOI":"10.24963\/ijcai.2019\/840"},{"key":"3_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-642-02930-1_1","volume-title":"Automata, Languages and Programming","author":"K Chatterjee","year":"2009","unstructured":"Chatterjee, K., Doyen, L., Henzinger, T.A.: A survey of stochastic games with limsup and liminf objectives. In: Albers, S., Marchetti-Spaccamela, A., Matias, Y., Nikoletseas, S., Thomas, W. (eds.) ICALP 2009. LNCS, vol. 5556, pp. 1\u201315. Springer, Heidelberg (2009). https:\/\/doi.org\/10.1007\/978-3-642-02930-1_1"},{"issue":"4","key":"3_CR9","doi-asserted-by":"publisher","first-page":"857","DOI":"10.1145\/210332.210339","volume":"42","author":"C Courcoubetis","year":"1995","unstructured":"Courcoubetis, C., Yannakakis, M.: The complexity of probabilistic verification. J. ACM 42(4), 857\u2013907 (1995)","journal-title":"J. ACM"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Gaon, M., Brafman, R.: Reinforcement learning with non-Markovian rewards. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34\u201304, pp. 3980\u20133987 (2020)","DOI":"10.1609\/aaai.v34i04.5814"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Gimbert, H., Zielonka, W.: Limits of multi-discounted Markov decision processes. In: Symposium on Logic in Computer Science (LICS 2007), pp. 89\u201398 (2007)","DOI":"10.1109\/LICS.2007.28"},{"key":"3_CR12","unstructured":"Goodfellow, I., Bengio, Y., Courville, A., Bengio, Y.: Deep Learning, vol. 1. MIT Press (2016)"},{"key":"3_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"395","DOI":"10.1007\/978-3-030-17462-0_27","volume-title":"Tools and Algorithms for the Construction and Analysis of Systems","author":"EM Hahn","year":"2019","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Omega-regular objectives in model-free reinforcement learning. In: Vojnar, T., Zhang, L. (eds.) TACAS 2019. LNCS, vol. 11427, pp. 395\u2013412. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-17462-0_27"},{"key":"3_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"108","DOI":"10.1007\/978-3-030-59152-6_6","volume-title":"Automated Technology for Verification and Analysis","author":"EM Hahn","year":"2020","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Faithful and effective reward schemes for model-free reinforcement learning of omega-regular objectives. In: Hung, D.V., Sokolsky, O. (eds.) ATVA 2020. LNCS, vol. 12302, pp. 108\u2013124. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-59152-6_6"},{"key":"3_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"306","DOI":"10.1007\/978-3-030-45190-5_17","volume-title":"Tools and Algorithms for the Construction and Analysis of Systems","author":"EM Hahn","year":"2020","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Good-for-MDPs automata for probabilistic analysis and reinforcement learning. In: TACAS 2020. LNCS, vol. 12078, pp. 306\u2013323. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-45190-5_17"},{"key":"3_CR16","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Model-free reinforcement learning for stochastic parity games. In: CONCUR: International Conference on Concurrency Theory. LIPIcs, vol. 171, pp. 21:1\u201321:16 (2020)"},{"key":"3_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"585","DOI":"10.1007\/978-3-642-22110-1_47","volume-title":"Computer Aided Verification","author":"M Kwiatkowska","year":"2011","unstructured":"Kwiatkowska, M., Norman, G., Parker, D.: PRISM 4.0: verification of probabilistic real-time systems. In: Gopalakrishnan, G., Qadeer, S. (eds.) CAV 2011. LNCS, vol. 6806, pp. 585\u2013591. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-22110-1_47"},{"issue":"4","key":"3_CR18","doi-asserted-by":"publisher","first-page":"376","DOI":"10.1007\/BF01691063","volume":"3","author":"LH Landweber","year":"1969","unstructured":"Landweber, L.H.: Decision problems for $$\\omega $$-automata. Math. Syst. Theory 3(4), 376\u2013384 (1969)","journal-title":"Math. Syst. Theory"},{"key":"3_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1007\/3-540-46691-6_8","volume-title":"Foundations of Software Technology and Theoretical Computer Science","author":"C L\u00f6ding","year":"1999","unstructured":"L\u00f6ding, C.: Optimal bounds for transformations of $$\\upomega $$-automata. In: Rangan, C.P., Raman, V., Ramanujam, R. (eds.) FSTTCS 1999. LNCS, vol. 1738, pp. 97\u2013109. Springer, Heidelberg (1999). https:\/\/doi.org\/10.1007\/3-540-46691-6_8"},{"key":"3_CR20","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1016\/0168-0072(93)90036-D","volume":"65","author":"R McNaughton","year":"1993","unstructured":"McNaughton, R.: Infinite games played on finite graphs. Ann. Pure Appl. Logic 65, 149\u2013184 (1993)","journal-title":"Ann. Pure Appl. Logic"},{"key":"3_CR21","unstructured":"Perrin, D., Pin, J.\u00c9.: Infinite Words: Automata, Semigroups, Logic and Games. Elsevier (2004)"},{"key":"3_CR22","doi-asserted-by":"crossref","unstructured":"Piterman, N., Pnueli, A.: Faster solutions of Rabin and Streett games. In: Symposium on Logic in Computer Science, pp. 275\u2013284 (2006)","DOI":"10.1109\/LICS.2006.23"},{"key":"3_CR23","doi-asserted-by":"crossref","unstructured":"Pnueli, A.: The temporal logic of programs. In: IEEE Symposium on Foundations of Computer Science, pp. 46\u201357 (1977)","DOI":"10.1109\/SFCS.1977.32"},{"key":"3_CR24","doi-asserted-by":"publisher","DOI":"10.1002\/9780470316887","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"ML Puterman","year":"1994","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley, New York (1994)"},{"key":"3_CR25","doi-asserted-by":"crossref","unstructured":"Sadigh, D., Kim, E., Coogan, S., Sastry, S.S., Seshia, S.A.: A learning based approach to control synthesis of Markov decision processes for linear temporal logic specifications. In: Conference on Decision and Control (CDC), pp. 1091\u20131096 (2014)","DOI":"10.21236\/ADA623517"},{"key":"3_CR26","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction, 2nd edn. MIT Press, Cambridge (2018)","edition":"2"},{"key":"3_CR27","doi-asserted-by":"crossref","unstructured":"Thomas, W.: Automata on infinite objects. In: Handbook of Theoretical Computer Science, pp. 133\u2013191. The MIT Press\/Elsevier (1990)","DOI":"10.1016\/B978-0-444-88074-1.50009-3"},{"key":"3_CR28","unstructured":"Toro Icarte, R., Klassen, T., Valenzano, R., McIlraith, S.: Using reward machines for high-level task specification and decomposition in reinforcement learning. In: International Conference on Machine Learning, pp. 2107\u20132116 (2018)"},{"key":"3_CR29","unstructured":"Toro Icarte, R., Waldie, E., Klassen, T., Valenzano, R., Castro, M., McIlraith, S.: Learning reward machines for partially observable reinforcement learning. In: Advances in Neural Information Processing Systems, vol. 32, pp. 15523\u201315534 (2019)"},{"key":"3_CR30","doi-asserted-by":"crossref","unstructured":"Vardi, M.Y.: Automatic verification of probabilistic concurrent finite state programs. In: Foundations of Computer Science, pp. 327\u2013338 (1985)","DOI":"10.1109\/SFCS.1985.12"},{"key":"3_CR31","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1007\/978-3-030-84060-0_8","volume-title":"Machine Learning and Knowledge Extraction","author":"Z Xu","year":"2021","unstructured":"Xu, Z., Wu, B., Ojha, A., Neider, D., Topcu, U.: Active finite reward automaton inference and reinforcement learning using queries and counterexamples. In: Holzinger, A., Kieseberg, P., Tjoa, A.M., Weippl, E. (eds.) CD-MAKE 2021. LNCS, vol. 12844, pp. 115\u2013135. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-84060-0_8"},{"issue":"1\u20132","key":"3_CR32","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1016\/S0304-3975(98)00009-7","volume":"200","author":"W Zielonka","year":"1998","unstructured":"Zielonka, W.: Infinite games on finitely coloured graphs with applications to automata on infinite trees. Theor. Comput. Sci. 200(1\u20132), 135\u2013183 (1998)","journal-title":"Theor. Comput. Sci."}],"container-title":["Lecture Notes in Computer Science","Automated Technology for Verification and Analysis"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-19992-9_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,6]],"date-time":"2024-10-06T10:00:53Z","timestamp":1728208853000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-19992-9_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031199912","9783031199929"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-19992-9_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"21 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ATVA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Automated Technology for Verification and Analysis","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"atva2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/atva-conference.org\/2022\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"81","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"21","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"26% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the COVID-19 pandemic, the conference was held virtually. Additional to the 26 papers, 1 invited talk is included.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}