{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T14:00:21Z","timestamp":1743084021398,"version":"3.40.3"},"publisher-location":"Cham","reference-count":41,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031101601"},{"type":"electronic","value":"9783031101618"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-10161-8_1","type":"book-chapter","created":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T20:02:54Z","timestamp":1658174574000},"page":"3-21","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Specification Aware Multi-Agent Reinforcement Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7707-1358","authenticated-orcid":false,"given":"Fabian","family":"Ritz","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thomy","family":"Phan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3108-713X","authenticated-orcid":false,"given":"Robert","family":"M\u00fcller","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thomas","family":"Gabor","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andreas","family":"Sedlmeier","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marc","family":"Zeller","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jan","family":"Wieghardt","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Reiner","family":"Schmid","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Horst","family":"Sauer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cornel","family":"Klein","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Claudia","family":"Linnhoff-Popien","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,7,19]]},"reference":[{"key":"1_CR1","unstructured":"Amodei, D., Olah, C., Steinhardt, J., Christiano, P.F., Schulman, J., Man\u00e9, D.: Concrete problems in AI safety. arXiv:1606.06565 (2016)"},{"key":"1_CR2","doi-asserted-by":"crossref","unstructured":"Belzner, L., Beck, M.T., Gabor, T., Roelle, H., Sauer, H.: Software engineering for distributed autonomous real-time systems. In: 2016 IEEE\/ACM 2nd International Workshop on Software Engineering for Smart Cyber-Physical Systems (SEsCPS), pp. 54\u201357. IEEE (2016)","DOI":"10.1145\/2897035.2897040"},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Bengio, Y., Louradour, J., Collobert, R., Weston, J.: Curriculum learning. In: Proceedings of the 26th Annual International Conference on Machine Learning, ICML 2009, pp. 41\u201348 (2009)","DOI":"10.1145\/1553374.1553380"},{"issue":"2","key":"1_CR4","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1145\/3089649.3089656","volume":"42","author":"T Bures","year":"2017","unstructured":"Bures, T., et al.: Software engineering for smart cyber-physical systems: challenges and promising solutions. ACM SIGSOFT Softw. Eng. Notes 42(2), 19\u201324 (2017)","journal-title":"ACM SIGSOFT Softw. Eng. Notes"},{"key":"1_CR5","unstructured":"Chang, Y.H., Ho, T., Kaelbling, L.P.: All learning is local: multi-agent learning in global reward games. In: Advances in Neural Information Processing Systems, pp. 807\u2013814 (2004)"},{"key":"1_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-642-02161-9_1","volume-title":"Software Engineering for Self-Adaptive Systems","author":"BHC Cheng","year":"2009","unstructured":"Cheng, B.H.C., et al.: Software engineering for self-adaptive systems: a research roadmap. In: Cheng, B.H.C., de Lemos, R., Giese, H., Inverardi, P., Magee, J. (eds.) Software Engineering for Self-Adaptive Systems. LNCS, vol. 5525, pp. 1\u201326. Springer, Heidelberg (2009). https:\/\/doi.org\/10.1007\/978-3-642-02161-9_1"},{"key":"1_CR7","unstructured":"Devlin, S., Kudenko, D.: Dynamic potential-based reward shaping. In: Proceedings of the 11th International Conference on Autonomous Agents and Multiagent Systems, AAMAS 2012, vol. 1, pp. 433\u2013440 (2012)"},{"key":"1_CR8","unstructured":"Devlin, S., Yliniemi, L., Kudenko, D., Tumer, K.: Potential-based difference rewards for multiagent reinforcement learning. In: Proceedings of the 2014 International Conference on Autonomous Agents and Multi-agent Systems, AAMAS 2014, pp. 165\u2013172 (2014)"},{"key":"1_CR9","unstructured":"Foerster, J., Assael, I.A., de Freitas, N., Whiteson, S.: Learning to communicate with deep multi-agent reinforcement learning. In: Advances in Neural Information Processing Systems, pp. 2137\u20132145 (2016)"},{"key":"1_CR10","doi-asserted-by":"crossref","unstructured":"Foerster, J.N., Farquhar, G., Afouras, T., Nardelli, N., Whiteson, S.: Counterfactual multi-agent policy gradients. In: Thirty-Second AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.11794"},{"issue":"42","key":"1_CR11","first-page":"1437","volume":"16","author":"J Garc\u00eda","year":"2015","unstructured":"Garc\u00eda, J., Fern\u00e1ndez, F.: A comprehensive survey on safe reinforcement learning. J. Mach. Learn. Res. 16(42), 1437\u20131480 (2015)","journal-title":"J. Mach. Learn. Res."},{"key":"1_CR12","unstructured":"Grze\u015b, M.: Reward shaping in episodic reinforcement learning. In: Proceedings of the 16th Conference on Autonomous Agents and MultiAgent Systems, AAMAS 2017, pp. 565\u2013573 (2017)"},{"key":"1_CR13","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1007\/978-3-319-71682-4_5","volume-title":"Autonomous Agents and Multiagent Systems","author":"JK Gupta","year":"2017","unstructured":"Gupta, J.K., Egorov, M., Kochenderfer, M.: Cooperative multi-agent control using deep reinforcement learning. In: Sukthankar, G., Rodriguez-Aguilar, J.A. (eds.) AAMAS 2017. LNCS (LNAI), vol. 10642, pp. 66\u201383. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-71682-4_5"},{"key":"1_CR14","unstructured":"Hendrycks, D., Carlini, N., Schulman, J., Steinhardt, J.: Unsolved problems in ML safety (2021)"},{"issue":"6443","key":"1_CR15","doi-asserted-by":"publisher","first-page":"859","DOI":"10.1126\/science.aau6249","volume":"364","author":"M Jaderberg","year":"2019","unstructured":"Jaderberg, M., et al.: Human-level performance in 3D multiplayer games with population-based reinforcement learning. Science 364(6443), 859\u2013865 (2019)","journal-title":"Science"},{"key":"1_CR16","doi-asserted-by":"publisher","first-page":"55","DOI":"10.3233\/KES-2010-0206","volume":"15","author":"GJ Laurent","year":"2011","unstructured":"Laurent, G.J., Matignon, L., Fort-Piat, L., et al.: The world of Independent Learners is not Markovian. J. Knowl.-Based Intell. Eng. Syst. 15, 55\u201364 (2011)","journal-title":"J. Knowl.-Based Intell. Eng. Syst."},{"key":"1_CR17","unstructured":"Leibo, J.Z., Zambaldi, V., Lanctot, M., Marecki, J., Graepel, T.: Multi-agent reinforcement learning in sequential social dilemmas. In: Proceedings of the 16th Conference on Autonomous Agents and Multiagent Systems, pp. 464\u2013473 (2017)"},{"key":"1_CR18","unstructured":"Leike, J., Krueger, D., Everitt, T., Martic, M., Maini, V., Legg, S.: Scalable agent alignment via reward modeling: a research direction (2018)"},{"key":"1_CR19","unstructured":"Leike, J., et al.: AI safety gridworlds. arXiv:1711.09883 (2017)"},{"key":"1_CR20","unstructured":"Liu, S., Lever, G., Merel, J., Tunyasuvunakool, S., Heess, N., Graepel, T.: Emergent coordination through competition. In: 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA (2019)"},{"key":"1_CR21","doi-asserted-by":"crossref","unstructured":"Lowd, D., Meek, C.: Adversarial learning. In: Proceedings of the Eleventh ACM SIGKDD International Conference on Knowledge Discovery in Data Mining, pp. 641\u2013647. ACM (2005)","DOI":"10.1145\/1081870.1081950"},{"key":"1_CR22","unstructured":"Lowe, R., Wu, Y., Tamar, A., Harb, J., Abbeel, P., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. In: Advances in Neural Information Processing Systems, pp. 6379\u20136390 (2017)"},{"key":"1_CR23","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning (2016)"},{"key":"1_CR24","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518, 529\u2013533 (2015)","journal-title":"Nature"},{"key":"1_CR25","unstructured":"Ng, A.Y., Harada, D., Russell, S.J.: Policy invariance under reward transformations: theory and application to reward shaping. In: Proceedings of the Sixteenth International Conference on Machine Learning, ICML 1999, pp. 278\u2013287 (1999)"},{"key":"1_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1007\/978-3-030-55754-6_6","volume-title":"NASA Formal Methods","author":"DT Phan","year":"2020","unstructured":"Phan, D.T., Grosu, R., Jansen, N., Paoletti, N., Smolka, S.A., Stoller, S.D.: Neural simplex architecture. In: Lee, R., Jha, S., Mavridou, A., Giannakopoulou, D. (eds.) NFM 2020. LNCS, vol. 12229, pp. 97\u2013114. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-55754-6_6"},{"key":"1_CR27","unstructured":"Phan, T., Belzner, L., Gabor, T., Schmid, K.: Leveraging statistical multi-agent online planning with emergent value function approximation. In: Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems, AAMAS, pp. 730\u2013738 (2018)"},{"key":"1_CR28","doi-asserted-by":"crossref","unstructured":"Phan, T., Belzner, L., Gabor, T., Sedlmeier, A., Ritz, F., Linnhoff-Popien, C.: Resilient multi-agent reinforcement learning with adversarial value decomposition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, no. 13, pp. 11308\u201311316 (2021)","DOI":"10.1609\/aaai.v35i13.17348"},{"key":"1_CR29","unstructured":"Phan, T., et al.: Learning and testing resilience in cooperative multi-agent systems. In: Proceedings of the 19th Conference on Autonomous Agents and MultiAgent Systems, AAMAS 2020 (2020)"},{"key":"1_CR30","unstructured":"Rashid, T., Samvelyan, M., de Witt, C.S., Farquhar, G., Foerster, J., Whiteson, S.: QMIX: monotonic value function factorisation for deep multi-agent reinforcement learning. In: International Conference on Machine Learning, pp. 4292\u20134301 (2018)"},{"key":"1_CR31","doi-asserted-by":"publisher","unstructured":"Ritz, F., et al.: SAT-MARL: specification aware training in multi-agent reinforcement learning. In: Proceedings of the 13th International Conference on Agents and Artificial Intelligence, Volume 1: ICAART, pp. 28\u201337. SciTePress (2021). https:\/\/doi.org\/10.5220\/0010189500280037","DOI":"10.5220\/0010189500280037"},{"key":"1_CR32","doi-asserted-by":"crossref","unstructured":"Seurin, M., Preux, P., Pietquin, O.: \u201cI\u2019m sorry Dave, I\u2019m afraid I can\u2019t do that\u201d deep q-learning from forbidden action. arXiv:1910.02078 (2019)","DOI":"10.1109\/IJCNN48605.2020.9207496"},{"issue":"6419","key":"1_CR33","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1126\/science.aar6404","volume":"362","author":"D Silver","year":"2018","unstructured":"Silver, D., et al.: A general reinforcement learning algorithm that masters chess, shogi, and go through self-play. Science 362(6419), 1140\u20131144 (2018). https:\/\/doi.org\/10.1126\/science.aar6404","journal-title":"Science"},{"key":"1_CR34","unstructured":"Son, K., Kim, D., Kang, W.J., Hostallero, D.E., Yi, Y.: QTRAN: learning to factorize with transformation for cooperative multi-agent reinforcement learning. In: International Conference on Machine Learning, pp. 5887\u20135896 (2019)"},{"key":"1_CR35","unstructured":"Sunehag, P., et al.: Value-decomposition networks for cooperative multi-agent learning based on team reward. In: Proceedings of the 17th International Conference on Autonomous Agents and Multiagent Systems (Extended Abstract), IFAAMAS, pp. 2085\u20132087 (2018)"},{"key":"1_CR36","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. A Bradford Book, Cambridge (2018)"},{"key":"1_CR37","doi-asserted-by":"crossref","unstructured":"Tampuu, A., et al.: Multiagent cooperation and competition with deep reinforcement learning. PLoS ONE 12(4), e0172395 (2017)","DOI":"10.1371\/journal.pone.0172395"},{"key":"1_CR38","doi-asserted-by":"publisher","first-page":"158","DOI":"10.1016\/j.comnet.2015.12.017","volume":"101","author":"S Wang","year":"2016","unstructured":"Wang, S., Wan, J., Zhang, D., Li, D., Zhang, C.: Towards smart factory for industry 4.0: a self-organized multi-agent system with big data based feedback and coordination. Comput. Netw. 101, 158\u2013168 (2016)","journal-title":"Comput. Netw."},{"issue":"3\u20134","key":"1_CR39","first-page":"279","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins, C.J., Dayan, P.: Q-learning. Mach. Learn. 8(3\u20134), 279\u2013292 (1992)","journal-title":"Mach. Learn."},{"key":"1_CR40","doi-asserted-by":"crossref","unstructured":"Wolpert, D.H., Tumer, K.: Optimal payoff functions for members of collectives. In: Modeling Complexity in Economic and Social Systems, pp. 355\u2013369. World Scientific (2002)","DOI":"10.1142\/9789812777263_0020"},{"key":"1_CR41","unstructured":"Zahavy, T., Haroush, M., Merlis, N., Mankowitz, D.J., Mannor, S.: Learn what not to learn: action elimination with deep reinforcement learning. In: Bengio, S., Wallach, H., Larochelle, H., Grauman, K., Cesa-Bianchi, N., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol. 31, pp. 3562\u20133573. Curran Associates, Inc. (2018)"}],"container-title":["Lecture Notes in Computer Science","Agents and Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-10161-8_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,11]],"date-time":"2023-02-11T22:25:01Z","timestamp":1676154301000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-10161-8_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031101601","9783031101618"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-10161-8_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"19 July 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICAART","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Agents and Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 February 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 February 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icaart2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.icaart.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"PRIMORIS","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"298","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"72","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"99","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17 selected papers are included in the LNAI proceedings","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}