{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T07:03:53Z","timestamp":1743145433463,"version":"3.40.3"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031432637"},{"type":"electronic","value":"9783031432644"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43264-4_6","type":"book-chapter","created":{"date-parts":[[2023,9,6]],"date-time":"2023-09-06T23:03:02Z","timestamp":1694041382000},"page":"83-99","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Exploiting Reward Machines with\u00a0Deep Reinforcement Learning in\u00a0Continuous Action Domains"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-7185-0921","authenticated-orcid":false,"given":"Haolin","family":"Sun","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1625-0226","authenticated-orcid":false,"given":"Yves","family":"Lesp\u00e9rance","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,7]]},"reference":[{"key":"6_CR1","unstructured":"Brafman, R.I., Giacomo, G.D., Patrizi, F.: LTLf\/LDLf non-markovian rewards. In: McIlraith, S.A., Weinberger, K.Q. (eds.) Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence, (AAAI-18), the 30th Innovative Applications of Artificial Intelligence (IAAI-18), and the 8th AAAI Symposium on Educational Advances in Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February 2\u20137, 2018, pp. 1771\u20131778. AAAI Press (2018). https:\/\/www.aaai.org\/ocs\/index.php\/AAAI\/AAAI18\/paper\/view\/17342"},{"key":"6_CR2","unstructured":"Brockman, G., Cheung, V., Pettersson, L., Schneider, J., Schulman, J., Tang, J., Zaremba, W.: OpenAI Gym. CoRR abs\/1606.01540 (2016), http:\/\/arxiv.org\/abs\/1606.01540"},{"key":"6_CR3","doi-asserted-by":"crossref","unstructured":"De Giacomo, G., Iocchi, L., Favorito, M., Patrizi, F.: Restraining bolts for reinforcement learning agents. In: The Thirty-Fourth AAAI Conference on Artificial Intelligence, AAAI 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, IAAI 2020, The Tenth AAAI Symposium on Educational Advances in Artificial Intelligence, EAAI 2020, New York, NY, USA, February 7\u201312, 2020, pp. 13659\u201313662. AAAI Press (2020). https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/7114","DOI":"10.1609\/aaai.v34i09.7114"},{"key":"6_CR4","unstructured":"De Giacomo, G., Vardi, M.Y.: Linear temporal logic and linear dynamic logic on finite traces. In: Rossi, F. (ed.) IJCAI 2013, Proceedings of the 23rd International Joint Conference on Artificial Intelligence, Beijing, China, August 3\u20139, 2013. pp. 854\u2013860. IJCAI\/AAAI (2013). http:\/\/www.aaai.org\/ocs\/index.php\/IJCAI\/IJCAI13\/paper\/view\/6997"},{"key":"6_CR5","doi-asserted-by":"publisher","unstructured":"Feinberg, A.: Markov decision processes: discrete stochastic dynamic programming (Martin L. Puterman). SIAM Rev. 38(4), 689 (1996). https:\/\/doi.org\/10.1137\/1038137","DOI":"10.1137\/1038137"},{"key":"6_CR6","unstructured":"Fujimoto, S., van Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. In: Dy, J.G., Krause, A. (eds.) Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10\u201315, 2018. Proceedings of Machine Learning Research, vol. 80, pp. 1582\u20131591. PMLR (2018). http:\/\/proceedings.mlr.press\/v80\/fujimoto18a.html"},{"key":"6_CR7","doi-asserted-by":"publisher","first-page":"53601","DOI":"10.1109\/ACCESS.2022.3175493","volume":"10","author":"A Guillen-Perez","year":"2022","unstructured":"Guillen-Perez, A., Cano, M.: Learning from oracle demonstrations - a new approach to develop autonomous intersection management control algorithms based on multiagent deep reinforcement learning. IEEE Access 10, 53601\u201353613 (2022). https:\/\/doi.org\/10.1109\/ACCESS.2022.3175493","journal-title":"IEEE Access"},{"key":"6_CR8","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: Dy, J.G., Krause, A. (eds.) Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10\u201315, 2018. Proceedings of Machine Learning Research, vol. 80, pp. 1856\u20131865. PMLR (2018). http:\/\/proceedings.mlr.press\/v80\/haarnoja18b.html"},{"key":"6_CR9","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1007\/978-3-031-16336-4_11","volume-title":"Quantitative Evaluation of Systems","author":"M Hasanbeig","year":"2022","unstructured":"Hasanbeig, M., Kroening, D., Abate, A.: LCRL: certified policy synthesis via logically-constrained reinforcement learning. In: \u00c1brah\u00e1m, E., Paolieri, M. (eds.) QEST 2022. LNCS, vol. 13479, pp. 217\u2013231. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-16336-4_11"},{"key":"6_CR10","unstructured":"van Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double q-learning. In: Schuurmans, D., Wellman, M.P. (eds.) Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence, February 12\u201317, 2016, Phoenix, Arizona, USA, pp. 2094\u20132100. AAAI Press (2016). http:\/\/www.aaai.org\/ocs\/index.php\/AAAI\/AAAI16\/paper\/view\/12389"},{"key":"6_CR11","unstructured":"Lacerda, B., Parker, D., Hawes, N.: Optimal policy generation for partially satisfiable co-safe LTL specifications. In: Yang, Q., Wooldridge, M.J. (eds.) Proceedings of the Twenty-Fourth International Joint Conference on Artificial Intelligence, IJCAI 2015, Buenos Aires, Argentina, July 25\u201331, 2015, pp. 1587\u20131593. AAAI Press (2015). http:\/\/ijcai.org\/Abstract\/15\/227"},{"key":"6_CR12","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. In: Bengio, Y., LeCun, Y. (eds.) 4th International Conference on Learning Representations, ICLR 2016, San Juan, Puerto Rico, May 2\u20134, 2016, Conference Track Proceedings (2016). http:\/\/arxiv.org\/abs\/1509.02971"},{"issue":"7540","key":"6_CR13","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015). https:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"key":"6_CR14","unstructured":"Ng, A.Y., Harada, D., Russell, S.: Policy invariance under reward transformations: theory and application to reward shaping. In: Bratko, I., Dzeroski, S. (eds.) Proceedings of the Sixteenth International Conference on Machine Learning (ICML 1999), Bled, Slovenia, June 27\u201330, 1999, pp. 278\u2013287. Morgan Kaufmann (1999)"},{"key":"6_CR15","doi-asserted-by":"publisher","unstructured":"Pnueli, A.: The temporal logic of programs. In: 18th Annual Symposium on Foundations of Computer Science, Providence, Rhode Island, USA, 31 October - 1 November 1977, pp. 46\u201357. IEEE Computer Society (1977). https:\/\/doi.org\/10.1109\/SFCS.1977.32","DOI":"10.1109\/SFCS.1977.32"},{"key":"6_CR16","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M.I., Moritz, P.: Trust region policy optimization. In: Bach, F.R., Blei, D.M. (eds.) Proceedings of the 32nd International Conference on Machine Learning, ICML 2015, Lille, France, 6\u201311 July 2015. JMLR Workshop and Conference Proceedings, vol. 37, pp. 1889\u20131897. JMLR.org (2015). http:\/\/proceedings.mlr.press\/v37\/schulman15.html"},{"key":"6_CR17","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. CoRR abs\/1707.06347 (2017). http:\/\/arxiv.org\/abs\/1707.06347"},{"key":"6_CR18","doi-asserted-by":"crossref","unstructured":"Sun, H.: Exploiting Reward Machines with Deep Reinforcement Learning in Continuous Action Domains. Master\u2019s thesis, EECS Dept., York University, Toronto, Canada (2022)","DOI":"10.1007\/978-3-031-43264-4_6"},{"issue":"1\u20132","key":"6_CR19","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between MDPs and semi-MDPs: a framework for temporal abstraction in reinforcement learning. Artif. Intell. 112(1\u20132), 181\u2013211 (1999). https:\/\/doi.org\/10.1016\/S0004-3702(99)00052-1","journal-title":"Artif. Intell."},{"key":"6_CR20","unstructured":"Toro Icarte, R.: Reward Machines. Ph.D. thesis, University of Toronto, Canada (2022). http:\/\/hdl.handle.net\/1807\/110754"},{"key":"6_CR21","unstructured":"Toro Icarte, R., Klassen, T.Q., Valenzano, R.A., McIlraith, S.A.: Using reward machines for high-level task specification and decomposition in reinforcement learning. In: Dy, J.G., Krause, A. (eds.) Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10\u201315, 2018. Proceedings of Machine Learning Research, vol. 80, pp. 2112\u20132121. PMLR (2018). http:\/\/proceedings.mlr.press\/v80\/icarte18a.html"},{"key":"6_CR22","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1613\/jair.1.12440","volume":"73","author":"R Toro Icarte","year":"2022","unstructured":"Toro Icarte, R., Klassen, T.Q., Valenzano, R.A., McIlraith, S.A.: Reward machines: exploiting reward function structure in reinforcement learning. J. Artif. Intell. Res. 73, 173\u2013208 (2022). https:\/\/doi.org\/10.1613\/jair.1.12440","journal-title":"J. Artif. Intell. Res."},{"key":"6_CR23","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins, C.J.C.H., Dayan, P.: Q-learning. Mach. Learn. 8, 279\u2013292 (1992). https:\/\/doi.org\/10.1007\/BF00992698","journal-title":"Mach. Learn."}],"container-title":["Lecture Notes in Computer Science","Multi-Agent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43264-4_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T11:41:43Z","timestamp":1710330103000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43264-4_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031432637","9783031432644"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43264-4_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"7 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"EUMAS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Multi-Agent Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Naples","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eumas2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/vadimmalvone.github.io\/eumas2023\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"47","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"51% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"PhD - Short Papers (20 submissions; 16 short papers accepted for proceedings)","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}