{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:39:24Z","timestamp":1742913564981,"version":"3.40.3"},"publisher-location":"Cham","reference-count":34,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031352560"},{"type":"electronic","value":"9783031352577"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-35257-7_4","type":"book-chapter","created":{"date-parts":[[2023,6,26]],"date-time":"2023-06-26T23:02:32Z","timestamp":1687820552000},"page":"59-76","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Decomposing Synthesized Strategies for\u00a0Reactive Multi-agent Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Chenyang","family":"Zhu","sequence":"first","affiliation":[]},{"given":"Jinyu","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Yujie","family":"Cai","sequence":"additional","affiliation":[]},{"given":"Fang","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,6,27]]},"reference":[{"key":"4_CR1","unstructured":"Brockman, G., et al.: Openai gym. arXiv preprint arXiv:1606.01540 (2016)"},{"key":"4_CR2","doi-asserted-by":"crossref","unstructured":"Camacho, A., Toro Icarte, R., Klassen, T.Q., Valenzano, R., McIlraith, S.A.: Ltl and beyond: Formal languages for reward function specification in reinforcement learning. In: Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence, (IJCAI), pp. 6065\u20136073 (7 2019)","DOI":"10.24963\/ijcai.2019\/840"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Cassandras, C.G., Lafortune, S.: Introduction to discrete event systems. Springer (2008)","DOI":"10.1007\/978-0-387-68612-7"},{"issue":"5","key":"4_CR4","doi-asserted-by":"publisher","first-page":"1244","DOI":"10.1109\/TAC.2014.2298143","volume":"59","author":"X Ding","year":"2014","unstructured":"Ding, X., Smith, S.L., Belta, C., Rus, D.: Optimal control of markov decision processes with linear temporal logic constraints. IEEE Trans. Autom. Control 59(5), 1244\u20131257 (2014)","journal-title":"IEEE Trans. Autom. Control"},{"key":"4_CR5","unstructured":"Even-Dar, E., Mansour, Y.: Convergence of optimistic and incremental q-learning. In: Advances in Neural Information Processing Systems, vol. 14 (2001)"},{"key":"4_CR6","doi-asserted-by":"crossref","unstructured":"Gao, Q., Hajinezhad, D., Zhang, Y., Kantaros, Y., Zavlanos, M.M.: Reduced variance deep reinforcement learning with temporal logic specifications. In: Proceedings of the 10th ACM\/IEEE International Conference on Cyber-Physical Systems(ICCPS), pp. 237\u2013248 (2019)","DOI":"10.1145\/3302509.3311053"},{"key":"4_CR7","doi-asserted-by":"publisher","unstructured":"Gronauer, S., Diepold, K.: Multi-agent deep reinforcement learning: a survey. Artif. Intell. Rev. 5, 1\u201349 (2021). https:\/\/doi.org\/10.1007\/s10462-021-09996-w","DOI":"10.1007\/s10462-021-09996-w"},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"Hahn, E.M., Perez, M., Schewe, S., Somenzi, F., Trivedi, A., Wojtczak, D.: Omega-regular objectives in model-free reinforcement learning. In: International Conference on Tools and Algorithms for the Construction and Analysis of Systems(TACAS), pp. 395\u2013412. Springer (2019)","DOI":"10.1007\/978-3-030-17462-0_27"},{"key":"4_CR9","unstructured":"Hammond, L., Abate, A., Gutierrez, J., Wooldridge, M.: Multi-agent reinforcement learning with temporal logic specifications. In: Adaptive Agents and Multi-Agent Systems (2021)"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Hasanbeig, M., Kantaros, Y., Abate, A., Kroening, D., Pappas, G.J., Lee, I.: Reinforcement learning for temporal logic control synthesis with probabilistic satisfaction guarantees. In: 2019 IEEE 58th Conference on Decision and Control (CDC), pp. 5338\u20135343. IEEE (2019)","DOI":"10.1109\/CDC40024.2019.9028919"},{"key":"4_CR11","first-page":"15931","volume":"33","author":"Y Hu","year":"2020","unstructured":"Hu, Y., et al.: Learning to utilize shaping rewards: a new approach of reward shaping. Adv. Neural. Inf. Process. Syst. 33, 15931\u201315941 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"4_CR12","unstructured":"Icarte, R.T., Klassen, T., Valenzano, R., McIlraith, S.: Using reward machines for high-level task specification and decomposition in reinforcement learning. In: International Conference on Machine Learning(ICML), pp. 2107\u20132116. PMLR (2018)"},{"key":"4_CR13","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1613\/jair.1.12440","volume":"73","author":"RT Icarte","year":"2022","unstructured":"Icarte, R.T., Klassen, T.Q., Valenzano, R., McIlraith, S.A.: Reward machines: exploiting reward function structure in reinforcement learning. J. Artif. Intell. Res. 73, 173\u2013208 (2022)","journal-title":"J. Artif. Intell. Res."},{"key":"4_CR14","doi-asserted-by":"crossref","unstructured":"Ikeda, T., Shibuya, T.: Centralized training with decentralized execution reinforcement learning for cooperative multi-agent systems with communication delay. In: 2022 61st Annual Conference of the Society of Instrument and Control Engineers (SICE), pp. 135\u2013140. IEEE (2022)","DOI":"10.23919\/SICE56594.2022.9905866"},{"key":"4_CR15","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling, L.P., Littman, M.L., Moore, A.W.: Reinforcement learning: a survey. J. Artif. Intell. Res. 4, 237\u2013285 (1996)","journal-title":"J. Artif. Intell. Res."},{"issue":"5","key":"4_CR16","doi-asserted-by":"publisher","first-page":"890","DOI":"10.1016\/j.automatica.2011.01.078","volume":"47","author":"M Karimadini","year":"2011","unstructured":"Karimadini, M., Lin, H.: Guaranteed global performance through local coordinations. Automatica 47(5), 890\u2013898 (2011)","journal-title":"Automatica"},{"issue":"6","key":"4_CR17","doi-asserted-by":"publisher","first-page":"2078","DOI":"10.1002\/asjc.1300","volume":"18","author":"M Karimadini","year":"2016","unstructured":"Karimadini, M., Lin, H., Karimoddini, A.: Cooperative tasking for deterministic specification automata. Asian J. Contr. 18(6), 2078\u20132087 (2016)","journal-title":"Asian J. Contr."},{"key":"4_CR18","unstructured":"Livingston, S.: Gr1c: A collection of tools for gr(1) synthesis and related activities, https:\/\/github.com\/tulip-control\/gr1c"},{"issue":"2","key":"4_CR19","doi-asserted-by":"publisher","first-page":"214","DOI":"10.1006\/inco.1995.1134","volume":"121","author":"N Lynch","year":"1995","unstructured":"Lynch, N., Vaandrager, F.: Forward and backward simulations. Inf. Comput. 121(2), 214\u2013233 (1995)","journal-title":"Inf. Comput."},{"key":"4_CR20","unstructured":"Neary, C., Xu, Z., Wu, B., Topcu, U.: Reward machines for cooperative multi-agent reinforcement learning. In: Proceedings of the 20th International Conference on Autonomous Agents and MultiAgent Systems. pp. 934\u2013942. AAMAS In: 21, International Foundation for Autonomous Agents and Multiagent Systems, Richland, SC (2021)"},{"issue":"3","key":"4_CR21","doi-asserted-by":"publisher","first-page":"761","DOI":"10.1109\/LCSYS.2020.2980552","volume":"4","author":"R Oura","year":"2020","unstructured":"Oura, R., Sakakibara, A., Ushio, T.: Reinforcement learning of control policy for linear temporal logic specifications using limit-deterministic generalized b\u00fcchi automata. IEEE Contr. Syst. Lett. 4(3), 761\u2013766 (2020)","journal-title":"IEEE Contr. Syst. Lett."},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"Piterman, N., Pnueli, A., Sa\u2019ar, Y.: Synthesis of reactive (1) designs. In: International Workshop on Verification, Model Checking, and Abstract Interpretation, pp. 364\u2013380. Springer (2006)","DOI":"10.1007\/11609773_24"},{"key":"4_CR23","doi-asserted-by":"publisher","unstructured":"Pnueli, A.: The temporal logic of programs. In: 18th Annual Symposium on Foundations of Computer Science (sfcs 1977), pp. 46\u201357. IEEE, IEEE (Sep 1977). https:\/\/doi.org\/10.1109\/sfcs.1977.32","DOI":"10.1109\/sfcs.1977.32"},{"key":"4_CR24","doi-asserted-by":"crossref","unstructured":"Pnueli, A., Rosner, R.: On the synthesis of a reactive module. In: Proceedings of the 16th ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, pp. 179\u2013190 (1989)","DOI":"10.1145\/75277.75293"},{"key":"4_CR25","doi-asserted-by":"crossref","unstructured":"Tan, M.: Multi-agent reinforcement learning: Independent vs. cooperative agents. In: Proceedings of the Tenth International Conference on Machine Learning, pp. 330\u2013337 (1993)","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"4_CR26","unstructured":"Tang, H., et al.: Hierarchical deep multiagent reinforcement learning with temporal abstraction. arXiv preprint arXiv:1809.09332 (2018)"},{"key":"4_CR27","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.comcom.2022.09.018","volume":"196","author":"N Waqar","year":"2022","unstructured":"Waqar, N., Hassan, S.A., Pervaiz, H., Jung, H., Dev, K.: Deep multi-agent reinforcement learning for resource allocation in noma-enabled mec. Comput. Commun. 196, 1\u20138 (2022)","journal-title":"Comput. Commun."},{"issue":"3","key":"4_CR28","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins, C.J., Dayan, P.: Q-learning. Mach. Learn. 8(3), 279\u2013292 (1992)","journal-title":"Mach. Learn."},{"key":"4_CR29","doi-asserted-by":"crossref","unstructured":"Wolff, E.M., Topcu, U., Murray, R.M.: Robust control of uncertain markov decision processes with temporal logic specifications. In: 2012 IEEE 51st IEEE Conference on Decision and Control (CDC), pp. 3372\u20133379. IEEE (2012)","DOI":"10.1109\/CDC.2012.6426174"},{"key":"4_CR30","doi-asserted-by":"crossref","unstructured":"Yang, C., Littman, M.L., Carbin, M.: On the (in) tractability of reinforcement learning for ltl objectives. In: Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence, IJCAI, pp. 3650\u20133658 (2022)","DOI":"10.24963\/ijcai.2022\/507"},{"key":"4_CR31","doi-asserted-by":"crossref","unstructured":"Zhang, K., Yang, Z., Liu, H., Zhang, T., Basar, T.: Fully decentralized multi-agent reinforcement learning with networked agents. In: International Conference on Machine Learning, pp. 5872\u20135881. PMLR (2018)","DOI":"10.1109\/CDC.2018.8619581"},{"key":"4_CR32","doi-asserted-by":"publisher","DOI":"10.1016\/j.scico.2022.102907","volume":"225","author":"C Zhu","year":"2023","unstructured":"Zhu, C., Butler, M., Cirstea, C., Hoang, T.S.: A fairness-based refinement strategy to transform liveness properties in Event-B models. Sci. Comput. Program. 225, 102907 (2023)","journal-title":"Sci. Comput. Program."},{"key":"4_CR33","doi-asserted-by":"crossref","unstructured":"Zhu, C., Cai, Y., Hu, C., Bi, J.: Efficient reinforcement learning with generalized-reactivity specifications. In: 2022 29th Asia-Pacific Software Engineering Conference (APSEC), pp. 31\u201340. IEEE (2022)","DOI":"10.1109\/APSEC57359.2022.00015"},{"issue":"22","key":"4_CR34","doi-asserted-by":"publisher","first-page":"3716","DOI":"10.3390\/electronics11223716","volume":"11","author":"C Zhu","year":"2022","unstructured":"Zhu, C., Cai, Y., Zhu, J., Hu, C., Bi, J.: Gr (1)-guided deep reinforcement learning for multi-task motion planning under a stochastic environment. Electronics 11(22), 3716 (2022)","journal-title":"Electronics"}],"container-title":["Lecture Notes in Computer Science","Theoretical Aspects of Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-35257-7_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,26]],"date-time":"2023-06-26T23:03:16Z","timestamp":1687820596000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-35257-7_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031352560","9783031352577"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-35257-7_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"27 June 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TASE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Theoretical Aspects of Software Engineering","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bristol","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 July 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 July 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tase2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/bristolpl.github.io\/tase2023\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"49","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"19","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"39% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}