{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T16:12:24Z","timestamp":1775578344956,"version":"3.50.1"},"publisher-location":"Cham","reference-count":48,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031264115","type":"print"},{"value":"9783031264122","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-26412-2_16","type":"book-chapter","created":{"date-parts":[[2023,3,16]],"date-time":"2023-03-16T10:03:54Z","timestamp":1678961034000},"page":"251-266","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["MAVIPER: Learning Decision Tree Policies for\u00a0Interpretable Multi-agent Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Stephanie","family":"Milani","sequence":"first","affiliation":[]},{"given":"Zhicheng","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Nicholay","family":"Topin","sequence":"additional","affiliation":[]},{"given":"Zheyuan Ryan","family":"Shi","sequence":"additional","affiliation":[]},{"given":"Charles","family":"Kamhoua","sequence":"additional","affiliation":[]},{"given":"Evangelos E.","family":"Papalexakis","sequence":"additional","affiliation":[]},{"given":"Fei","family":"Fang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,3,17]]},"reference":[{"key":"16_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Ng, A.: Apprenticeship learning via inverse reinforcement learning. In: ICML (2004)","DOI":"10.1145\/1015330.1015430"},{"key":"16_CR2","unstructured":"Bastani, O., et al.: Verifiable reinforcement learning via policy extraction. In: NeurIPS (2018)"},{"key":"16_CR3","unstructured":"Berner, C., et al.: Dota 2 with large scale deep reinforcement learning. arXiv preprint 1912.06680 (2019)"},{"key":"16_CR4","doi-asserted-by":"crossref","unstructured":"Bhalla, S., et al.: Deep multi agent reinforcement learning for autonomous driving. In: Canadian Conference Artificial Intelligent (2020)","DOI":"10.1007\/978-3-030-47358-7_7"},{"key":"16_CR5","unstructured":"Brittain, M., Wei, P.: Autonomous air traffic controller: a deep multi-agent reinforcement learning approach. arXiv preprint arXiv:1905.01303 (2019)"},{"key":"16_CR6","doi-asserted-by":"crossref","unstructured":"Bucilu\u01ce, C., et al.: Model compression. In: KDD (2006)","DOI":"10.1145\/1150402.1150464"},{"key":"16_CR7","unstructured":"Chen, Z., et al.: Relace: Reinforcement learning agent for counterfactual explanations of arbitrary predictive models. arXiv preprint arXiv:2110.11960 (2021)"},{"key":"16_CR8","doi-asserted-by":"crossref","unstructured":"Degris, T., et al.: Learning the structure of factored Markov decision processes in reinforcement learning problems. In: ICML (2006)","DOI":"10.1145\/1143844.1143877"},{"key":"16_CR9","unstructured":"Ernst, D., et al.: Tree-based batch mode reinforcement learning. JMLR 6 (2005)"},{"key":"16_CR10","unstructured":"Foerster, J., et al.: Stabilising experience replay for deep multi-agent reinforcement learning. In: ICML (2017)"},{"key":"16_CR11","doi-asserted-by":"crossref","unstructured":"Foerster, J., et al.: Counterfactual multi-agent policy gradients. In: AAAI (2018)","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"16_CR12","doi-asserted-by":"crossref","unstructured":"Heuillet, A., et al.: Collective explainable ai: explaining cooperative strategies and agent contribution in multiagent reinforcement learning with shapley values. IEEE Comput. Intell. Mag. 17, 59\u201371 (2022)","DOI":"10.1109\/MCI.2021.3129959"},{"key":"16_CR13","unstructured":"Hinton, G., et al.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)"},{"key":"16_CR14","unstructured":"Iqbal, S., Sha, F.: Actor-attention-critic for multi-agent reinforcement learning. In: ICML (2019)"},{"key":"16_CR15","doi-asserted-by":"crossref","unstructured":"Kazhdan, D., et al.: Marleme: a multi-agent reinforcement learning model extraction library. In: IJCNN (2020)","DOI":"10.1109\/IJCNN48605.2020.9207564"},{"key":"16_CR16","doi-asserted-by":"crossref","unstructured":"Li, S., et al.: Robust multi-agent reinforcement learning via minimax deep deterministic policy gradient. In: AAAI (2019)","DOI":"10.1609\/aaai.v33i01.33014213"},{"key":"16_CR17","doi-asserted-by":"crossref","unstructured":"Li, W., et al.: Sparsemaac: sparse attention for multi-agent reinforcement learning. In: International Conference on Database Systems for Advanced Applications (2019)","DOI":"10.1007\/978-3-030-18590-9_7"},{"key":"16_CR18","doi-asserted-by":"crossref","unstructured":"Lipton, Z.: The mythos of model interpretability. ACM Queue 16(3) (2018)","DOI":"10.1145\/3236386.3241340"},{"key":"16_CR19","doi-asserted-by":"crossref","unstructured":"Littman, M.: Markov games as a framework for multi-agent reinforcement learning. In: Mach. Learning (1994)","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"16_CR20","unstructured":"Lowe, R., et al.: Multi-agent actor-critic for mixed cooperative-competitive environments. arXiv preprint arXiv:1706.02275 (2017)"},{"key":"16_CR21","unstructured":"Luss, R., et al.: Local explanations for reinforcement learning. arXiv preprint arXiv:2202.03597 (2022)"},{"key":"16_CR22","doi-asserted-by":"crossref","unstructured":"Malialis, K., Kudenko, D.: Distributed response to network intrusions using multiagent reinforcement learning. Eng. Appl. Artif. Intell. 40, 270\u2013284 (2015)","DOI":"10.1016\/j.engappai.2015.01.013"},{"key":"16_CR23","doi-asserted-by":"crossref","unstructured":"Matignon, L., et al.: Independent reinforcement learners in cooperative Markov games: a survey regarding coordination problems. Knowl. Eng. Rev. 27(1), 1\u201331 (2012)","DOI":"10.1017\/S0269888912000057"},{"key":"16_CR24","unstructured":"McCallum, R.: Reinforcement learning with selective perception and hidden state. Ph.D. thesis, Univ. Rochester, Dept. of Comp. Sci. (1997)"},{"key":"16_CR25","doi-asserted-by":"crossref","unstructured":"Meng, Z., et al.: Interpreting deep learning-based networking systems. In: Proceedings of the Annual Conference of the ACM Special Interest Group on Data Communication on the Applications, Technologies, Architectures, and Protocols for Computer Communication (2020)","DOI":"10.1145\/3387514.3405859"},{"key":"16_CR26","unstructured":"Milani, S., et al.: A survey of explainable reinforcement learning. arXiv preprint arXiv:2202.08434 (2022)"},{"key":"16_CR27","unstructured":"Mohanty, S., et al.: Flatland-rl: multi-agent reinforcement learning on trains. arXiv preprint arXiv:2012.05893 (2020)"},{"key":"16_CR28","doi-asserted-by":"crossref","unstructured":"Molnar, C.: Interpretable Machine Learning (2019)","DOI":"10.21105\/joss.00786"},{"key":"16_CR29","doi-asserted-by":"crossref","unstructured":"Motokawa, Y., Sugawara, T.: MAT-DQN: toward interpretable multi-agent deep reinforcement learning for coordinated activities. In: ICANN (2021)","DOI":"10.1007\/978-3-030-86380-7_45"},{"key":"16_CR30","doi-asserted-by":"crossref","unstructured":"Oliehoek, F., et al.: Optimal and approximate q-value functions for decentralized pomdps. JAIR 32, 289\u2013353 (2008)","DOI":"10.1613\/jair.2447"},{"key":"16_CR31","unstructured":"Paszke, A., et al.: Automatic differentiation in pytorch (2017)"},{"key":"16_CR32","unstructured":"Pyeatt, L.: Reinforcement learning with decision trees. In: Appl. Informatics (2003)"},{"key":"16_CR33","unstructured":"Pyeatt, L., Howe, A.: Decision tree function approximation in reinforcement learning. In: Int. Symp. on Adaptive Syst.: Evol. Comput. and Prob. Graphical Models (2001)"},{"key":"16_CR34","doi-asserted-by":"crossref","unstructured":"Quinlan, J.: Induction of decision trees. Mach. Learn. 1, 81\u2013106 (1986)","DOI":"10.1007\/BF00116251"},{"key":"16_CR35","unstructured":"Rashid, T., et al.: Qmix: monotonic value function factorisation for deep multi-agent reinforcement learning. In: ICML (2018)"},{"key":"16_CR36","unstructured":"Ross, S., et al.: A reduction of imitation learning and structured prediction to no-regret online learning. In: AISTATS (2011)"},{"key":"16_CR37","unstructured":"Roth, A., et al.: Conservative q-improvement: reinforcement learning for an interpretable decision-tree policy. arXiv preprint arXiv:1907.01180 (2019)"},{"key":"16_CR38","doi-asserted-by":"crossref","unstructured":"Shapley, L.: Stochastic games. PNAS 39(10), 1095\u20131100 (1953)","DOI":"10.1073\/pnas.39.10.1095"},{"key":"16_CR39","unstructured":"Son, K., et al.: Qtran: Learning to factorize with transformation for cooperative multi-agent reinforcement learning. arXiv preprint arXiv:1905.05408 (2019)"},{"key":"16_CR40","unstructured":"Strehl, A., et al.: Efficient structure learning in factored-state mdps. In: AAAI (2007)"},{"key":"16_CR41","unstructured":"Sunehag, P., et al.: Value-decomposition networks for cooperative multi-agent learning. arXiv preprint arXiv:1706.05296 (2017)"},{"key":"16_CR42","doi-asserted-by":"crossref","unstructured":"Topin, N., et al.: Iterative bounding mdps: learning interpretable policies via non-interpretable methods. In: AAAI (2021)","DOI":"10.1609\/aaai.v35i11.17192"},{"key":"16_CR43","doi-asserted-by":"crossref","unstructured":"Tuyls, K., et al.: Reinforcement learning in large state spaces. In: Robot Soccer World Cup (2002)","DOI":"10.1007\/978-3-540-45135-8_27"},{"key":"16_CR44","doi-asserted-by":"crossref","unstructured":"Uther, W., Veloso, M.: The lumberjack algorithm for learning linked decision forests. In: International Symposium on Abstraction, Reformulation, and Approximation (2000)","DOI":"10.1007\/3-540-44914-0_13"},{"key":"16_CR45","unstructured":"Vasic, M., et al.: Mo\u00ebt: Interpretable and verifiable reinforcement learning via mixture of expert trees. arXiv preprint arXiv:1906.06717 (2019)"},{"key":"16_CR46","unstructured":"Wang, T., et al.: Dataset distillation. arXiv preprint arXiv:1811.10959 (2018)"},{"key":"16_CR47","unstructured":"Wang, X., et al.: Explanation of reinforcement learning model in dynamic multi-agent system. arXiv preprint arXiv:2008.01508 (2020)"},{"key":"16_CR48","unstructured":"Yu, C., et al.: The surprising effectiveness of mappo in cooperative, multi-agent games. arXiv preprint arXiv:2103.01955 (2021)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-26412-2_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,16]],"date-time":"2024-10-16T14:46:36Z","timestamp":1729089996000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-26412-2_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031264115","9783031264122"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-26412-2_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"17 March 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Grenoble","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"France","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2022.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1060","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"236","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"22% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17 demo track papers have been accepted from 28 submissions","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}