{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T17:36:45Z","timestamp":1743010605637,"version":"3.40.3"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031562549"},{"type":"electronic","value":"9783031562556"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-56255-6_3","type":"book-chapter","created":{"date-parts":[[2024,3,29]],"date-time":"2024-03-29T11:01:47Z","timestamp":1711710107000},"page":"43-59","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning Reward Machines in\u00a0Cooperative Multi-agent Tasks"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4400-7127","authenticated-orcid":false,"given":"Leo","family":"Ardon","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7461-1910","authenticated-orcid":false,"given":"Daniel","family":"Furelos-Blanco","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3318-8711","authenticated-orcid":false,"given":"Alessandra","family":"Russo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,3,30]]},"reference":[{"key":"3_CR1","volume-title":"Multi-Agent Reinforcement Learning: Foundations and Modern Approaches","author":"SV Albrecht","year":"2023","unstructured":"Albrecht, S.V., Christianos, F., Sch\u00e4fer, L.: Multi-Agent Reinforcement Learning: Foundations and Modern Approaches. MIT Press, Cambridge (2023)"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Ardon, L., Vadori, N., Spooner, T., Xu, M., Vann, J., Ganesh, S.: Towards a fully RL-based market simulator. In: Proceedings of the ACM International Conference on AI in Finance (ICAIF), pp. 7:1\u20137:9 (2021)","DOI":"10.1145\/3490354.3494372"},{"issue":"2","key":"3_CR3","doi-asserted-by":"publisher","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L Busoniu","year":"2008","unstructured":"Busoniu, L., Babuska, R., De Schutter, B.: A comprehensive survey of multiagent reinforcement learning. IEEE Trans. Syst. Man Cybern. Part C (Appl. Rev.) 38(2), 156\u2013172 (2008)","journal-title":"IEEE Trans. Syst. Man Cybern. Part C (Appl. Rev.)"},{"key":"3_CR4","doi-asserted-by":"crossref","unstructured":"Camacho, A., Toro Icarte, R., Klassen, T.Q., Valenzano, R.A., McIlraith, S.A.: LTL and beyond: formal languages for reward function specification in reinforcement learning. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), pp. 6065\u20136073 (2019)","DOI":"10.24963\/ijcai.2019\/840"},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Camacho, A., Varley, J., Zeng, A., Jain, D., Iscen, A., Kalashnikov, D.: Reward machines for vision-based robotic manipulation. In: Proceedings of the IEEE International Conference on Robotics and Automation (ICRA), pp. 14284\u201314290 (2021)","DOI":"10.1109\/ICRA48506.2021.9561927"},{"key":"3_CR6","unstructured":"Christoffersen, P.J.K., Li, A.C., Toro Icarte, R., McIlraith, S.A.: Learning symbolic representations for reinforcement learning of non-Markovian behavior. In: Proceedings of the Knowledge Representation and Reasoning Meets Machine Learning (KR2ML) Workshop at the Advances in Neural Information Processing Systems (NeurIPS) Conference (2020)"},{"key":"3_CR7","doi-asserted-by":"crossref","unstructured":"Dai, J., Lin, H.: Automatic synthesis of cooperative multi-agent systems. In: Proceedings of the IEEE Conference on Decision and Control (CDC), pp. 6173\u20136178 (2014)","DOI":"10.1109\/CDC.2014.7040356"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Dann, M., Yao, Y., Alechina, N., Logan, B., Thangarajah, J.: Multi-agent intention progression with reward machines. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), pp. 215\u2013222 (2022)","DOI":"10.24963\/ijcai.2022\/31"},{"key":"3_CR9","doi-asserted-by":"crossref","unstructured":"De Giacomo, G., Favorito, M., Iocchi, L., Patrizi, F., Ronca, A.: Temporal logic monitoring rewards via transducers. In: Proceedings of the International Conference on Principles of Knowledge Representation and Reasoning (KR), pp. 860\u2013870 (2020)","DOI":"10.24963\/kr.2020\/89"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Eappen, J., Jagannathan, S.: DistSPECTRL: distributing specifications in multi-agent reinforcement learning systems. arXiv preprint arXiv:2206.13754 (2022)","DOI":"10.1007\/978-3-031-26412-2_15"},{"issue":"1","key":"3_CR11","first-page":"22","volume":"7","author":"AE Elsefy","year":"2020","unstructured":"Elsefy, A.E.: A task decomposition using (HDec-POSMDPs) approach for multi-robot exploration and fire searching. Int. J. Robot. Mechatron. 7(1), 22\u201330 (2020)","journal-title":"Int. J. Robot. Mechatron."},{"issue":"3","key":"3_CR12","doi-asserted-by":"publisher","first-page":"4257","DOI":"10.1109\/LRA.2021.3064284","volume":"6","author":"F Fuchs","year":"2021","unstructured":"Fuchs, F., Song, Y., Kaufmann, E., Scaramuzza, D., Durr, P.: Super-human performance in gran turismo sport using deep reinforcement learning. IEEE Robot. Autom. Lett. 6(3), 4257\u20134264 (2021)","journal-title":"IEEE Robot. Autom. Lett."},{"key":"3_CR13","doi-asserted-by":"publisher","first-page":"1031","DOI":"10.1613\/jair.1.12372","volume":"70","author":"D Furelos-Blanco","year":"2021","unstructured":"Furelos-Blanco, D., Law, M., Jonsson, A., Broda, K., Russo, A.: Induction and exploitation of subgoal automata for reinforcement learning. J. Artif. Intell. Res. 70, 1031\u20131116 (2021)","journal-title":"J. Artif. Intell. Res."},{"key":"3_CR14","unstructured":"Furelos-Blanco, D., Law, M., Jonsson, A., Broda, K., Russo, A.: Hierarchies of reward machines. In: Proceedings of the International Conference on Machine Learning (ICML), pp. 10494\u201310541 (2023)"},{"key":"3_CR15","doi-asserted-by":"crossref","unstructured":"Gaon, M., Brafman, R.I.: Reinforcement learning with non-Markovian rewards. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI), pp. 3980\u20133987 (2020)","DOI":"10.1609\/aaai.v34i04.5814"},{"issue":"3","key":"3_CR16","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1016\/S0019-9958(78)90562-4","volume":"37","author":"EM Gold","year":"1978","unstructured":"Gold, E.M.: Complexity of automaton identification from given data. Inf. Control 37(3), 302\u2013320 (1978)","journal-title":"Inf. Control"},{"key":"3_CR17","doi-asserted-by":"crossref","unstructured":"Hasanbeig, M., Jeppu, N.Y., Abate, A., Melham, T., Kroening, D.: DeepSynth: automata synthesis for automatic task segmentation in deep reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI), pp. 7647\u20137656 (2021)","DOI":"10.1609\/aaai.v35i9.16935"},{"key":"3_CR18","unstructured":"Kaelbling, L.P.: Learning to achieve goals. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), pp. 1094\u20131099 (1993)"},{"key":"3_CR19","doi-asserted-by":"crossref","unstructured":"Law, M., Russo, A., Broda, K.: The ILASP System for Learning Answer Set Programs (2015). www.ilasp.com","DOI":"10.1007\/978-3-319-11558-0_22"},{"issue":"7540","key":"3_CR20","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"3_CR21","unstructured":"Neary, C., Xu, Z., Wu, B., Topcu, U.: Reward machines for cooperative multi-agent reinforcement learning. In: Proceedings of the International Conference on Autonomous Agents and Multiagent Systems (AAMAS), pp. 934\u2013942 (2021)"},{"issue":"1","key":"3_CR22","first-page":"7234","volume":"21","author":"T Rashid","year":"2020","unstructured":"Rashid, T., Samvelyan, M., De Witt, C.S., Farquhar, G., Foerster, J., Whiteson, S.: Monotonic value function factorisation for deep multi-agent reinforcement learning. J. Mach. Learn. Res. 21(1), 7234\u20137284 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"3_CR23","unstructured":"Shalev-Shwartz, S., Shammah, S., Shashua, A.: Safe, multi-agent, reinforcement learning for autonomous driving. arXiv preprint arXiv:1610.03295 (2016)"},{"issue":"7587","key":"3_CR24","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"3_CR25","unstructured":"Sultana, N.N., Meisheri, H., Baniwal, V., Nath, S., Ravindran, B., Khadilkar, H.: Reinforcement learning for multi-product multi-node inventory management in supply chains. arXiv preprint arXiv:2006.04037 (2020)"},{"key":"3_CR26","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (2018)"},{"key":"3_CR27","unstructured":"Toro Icarte, R., Klassen, T., Valenzano, R., McIlraith, S.: Using reward machines for high-level task specification and decomposition in reinforcement learning. In: Proceedings of the International Conference on Machine Learning (ICML), pp. 2107\u20132116 (2018)"},{"key":"3_CR28","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1613\/jair.1.12440","volume":"73","author":"R Toro Icarte","year":"2022","unstructured":"Toro Icarte, R., Klassen, T.Q., Valenzano, R., McIlraith, S.A.: Reward machines: exploiting reward function structure in reinforcement learning. J. Artif. Intell. Res. 73, 173\u2013208 (2022)","journal-title":"J. Artif. Intell. Res."},{"key":"3_CR29","unstructured":"Toro Icarte, R., Waldie, E., Klassen, T.Q., Valenzano, R.A., Castro, M.P., McIlraith, S.A.: Learning reward machines for partially observable reinforcement learning. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS) Conference, pp. 15497\u201315508 (2019)"},{"key":"3_CR30","doi-asserted-by":"crossref","unstructured":"Xu, Z., et al.: Joint inference of reward machines and policies for reinforcement learning. In: Proceedings of the International Conference on Automated Planning and Scheduling (ICAPS), pp. 590\u2013598 (2020)","DOI":"10.1609\/icaps.v30i1.6756"}],"container-title":["Lecture Notes in Computer Science","Autonomous Agents and Multiagent Systems. Best and Visionary Papers"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-56255-6_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,29]],"date-time":"2024-03-29T11:02:21Z","timestamp":1711710141000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-56255-6_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031562549","9783031562556"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-56255-6_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"30 March 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AAMAS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Autonomous Agents and Multiagent Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"London","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 May 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 May 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"atal2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aamas2023.soton.ac.uk\/program\/accepted-workshops\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}