{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T15:51:05Z","timestamp":1742917865125,"version":"3.40.3"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031264115"},{"type":"electronic","value":"9783031264122"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-26412-2_12","type":"book-chapter","created":{"date-parts":[[2023,3,16]],"date-time":"2023-03-16T10:03:54Z","timestamp":1678961034000},"page":"183-199","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Constrained Multiagent Reinforcement Learning for\u00a0Large Agent Population"],"prefix":"10.1007","author":[{"given":"Jiajing","family":"Ling","sequence":"first","affiliation":[]},{"given":"Arambam James","family":"Singh","sequence":"additional","affiliation":[]},{"given":"Nguyen Duc","family":"Thien","sequence":"additional","affiliation":[]},{"given":"Akshat","family":"Kumar","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,3,17]]},"reference":[{"key":"12_CR1","unstructured":"Achiam, J., Held, D., Tamar, A., Abbeel, P.: Constrained policy optimization. In: International Conference on Machine Learning, pp. 22\u201331 (2017)"},{"key":"12_CR2","doi-asserted-by":"publisher","first-page":"817","DOI":"10.1613\/jair.1.11418","volume":"64","author":"C Amato","year":"2019","unstructured":"Amato, C., Konidaris, G.D., Kaelbling, L.P., How, J.P.: Modeling and planning with macro-actions in decentralized POMDPs. JAIR 64, 817\u2013859 (2019)","journal-title":"JAIR"},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Becker, R., Zilberstein, S., Lesser, V.: Decentralized Markov decision processes with event-driven interactions. In: AAMAS, pp. 302\u2013309 (2004)","DOI":"10.1145\/860575.860583"},{"key":"12_CR4","doi-asserted-by":"crossref","unstructured":"Bernstein, D.S., Givan, R., Immerman, N., Zilberstein, S.: The complexity of decentralized control of Markov decision processes. Math. Oper. Res. 27(4), 637\u2013842 (2002)","DOI":"10.1287\/moor.27.4.819.297"},{"key":"12_CR5","unstructured":"Bertsekas, D.P.: Nonlinear programming. Athena Scientific (1999)"},{"key":"12_CR6","unstructured":"Chang, Y.H., Ho, T., Kaelbling, L.P.: All learning is local: multi-agent learning in global reward games. In: NeurIPS, pp. 807\u2013814 (2004)"},{"key":"12_CR7","unstructured":"Diddigi, R.B., Danda, S.K.R., Bhatnagar, S., et al.: Actor-critic algorithms for constrained multi-agent reinforcement learning. arXiv preprint:1905.02907 (2019)"},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"Foerster, J.N., Farquhar, G., Afouras, T., Nardelli, N., Whiteson, S.: Counterfactual multi-agent policy gradients. In: AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"12_CR9","unstructured":"Gattami, A., Bai, Q., Agarwal, V.: Reinforcement learning for multi-objective and constrained Markov decision processes. arXiv preprint arXiv:1901.08978 (2019)"},{"key":"12_CR10","unstructured":"H\u00fcttenrauch, M., \u0160o\u0161i\u0107, A., Neumann, G.: Deep reinforcement learning for swarm systems. IN: JMLR (2018)"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Kassir, S., de Veciana, G., Wang, N., Wang, X., Palacharla, P.: Enhancing cellular performance via vehicular-based opportunistic relaying and load balancing. In: INFOCOM IEEE Conference on Computer Communications, pp. 91\u201399 (2019)","DOI":"10.1109\/INFOCOM.2019.8737611"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Liu, C., Geng, N., Aggarwal, V., Lan, T., Yang, Y., Xu, M.: CMIX: deep multi-agent reinforcement learning with peak and average constraints. In: ECML PKDD (2021)","DOI":"10.1007\/978-3-030-86486-6_10"},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Liu, Y., Ding, J., Liu, X.: IPO: interior-point policy optimization under constraints. In: AAAI (2020)","DOI":"10.1609\/aaai.v34i04.5932"},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Lu, S., Zhang, K., Chen, T., Basar, T., Horesh, L.: Decentralized policy gradient descent ascent for safe multi-agent reinforcement learning. In: AAAI (2021)","DOI":"10.1609\/aaai.v35i10.17062"},{"key":"12_CR15","unstructured":"Meyers, C.A., Schulz, A.S.: The complexity of congestion games. Networks (2012)"},{"key":"12_CR16","unstructured":"Nair, R., Varakantham, P., Tambe, M., Yokoo, M.: Networked distributed POMDPs: a synthesis of distributed constraint optimization and POMDPs. In: AAAI Conference on Artificial Intelligence, pp. 133\u2013139 (2005)"},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"Nguyen, D.T., Kumar, A., Lau, H.C.: Collective multiagent sequential decision making under uncertainty. In: AAAI (2017)","DOI":"10.1609\/aaai.v31i1.10708"},{"key":"12_CR18","unstructured":"Nguyen, D.T., Kumar, A., Lau, H.C.: Policy gradient with value function approximation for collective multiagent planning. In: NeurIPS, pp. 4322\u20134332 (2017)"},{"key":"12_CR19","unstructured":"Nguyen, D.T., Kumar, A., Lau, H.C.: Credit assignment for collective multiagent RL with global rewards. In: NeurIPS, pp. 8113\u20138124 (2018)"},{"key":"12_CR20","doi-asserted-by":"publisher","unstructured":"Oliehoek, F.A., Amato, C.: A Concise Introduction to Decentralized POMDPs. SpringerBriefs in Intelligent Systems, Springer (2016). https:\/\/doi.org\/10.1007\/978-3-319-28929-8","DOI":"10.1007\/978-3-319-28929-8"},{"key":"12_CR21","unstructured":"Rashid, T., Samvelyan, M., de Witt, C.S., Farquhar, G., Foerster, J.N., Whiteson, S.: Monotonic value function factorisation for deep multi-agent reinforcement learning. JMLR 21, 1\u201351 (2020)"},{"key":"12_CR22","doi-asserted-by":"crossref","unstructured":"Singh, A.J.: Multiagent decision making for maritime traffic management. https:\/\/github.com\/rlr-smu\/camarl\/tree\/main\/PG_MTM (2019)","DOI":"10.1609\/aaai.v33i01.33016171"},{"key":"12_CR23","unstructured":"Singh, A.J., Kumar, A., Lau, H.C.: Hierarchical multiagent reinforcement learning for maritime traffic management. In: Proceedings of the 19th AAMAS (2020)"},{"key":"12_CR24","doi-asserted-by":"crossref","unstructured":"Singh, A.J., Kumar, A., Lau, H.C.: Learning and exploiting shaped reward models for large scale multiagent RL. In: ICAPS (2021)","DOI":"10.1609\/icaps.v31i1.16007"},{"key":"12_CR25","doi-asserted-by":"crossref","unstructured":"Singh, A.J., Nguyen, D.T., Kumar, A., Lau, H.C.: Multiagent decision making for maritime traffic management. In: AAAI (2019)","DOI":"10.1609\/aaai.v33i01.33016171"},{"key":"12_CR26","unstructured":"Subramanian, J., Mahajan, A.: Reinforcement learning in stationary mean-field games. In: AAMAS, pp. 251\u2013259 (2019)"},{"key":"12_CR27","unstructured":"Subramanian, S.G., Poupart, P., Taylor, M.E., Hegde, N.: Multi type mean field reinforcement learning. In: AAMAS (2020)"},{"key":"12_CR28","unstructured":"Subramanian, S.G., Taylor, M.E., Crowley, M., Poupart, P.: Partially observable mean field reinforcement learning. In: AAMAS, pp. 537\u2013545 (2021)"},{"key":"12_CR29","unstructured":"Sutton, R.S., McAllester, D., Singh, S., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: NeurIPS (1999)"},{"key":"12_CR30","unstructured":"Tessler, C., Mankowitz, D.J., Mannor, S.: Reward constrained policy optimization. In: International Conference on Learning Representations (2018)"},{"key":"12_CR31","doi-asserted-by":"crossref","unstructured":"Tumer, K., Agogino, A.: Distributed agent-based air traffic flow management. In: AAMAS, pp. 1\u20138 (2007)","DOI":"10.1145\/1329125.1329434"},{"key":"12_CR32","doi-asserted-by":"crossref","unstructured":"Varakantham, P., Adulyasak, Y., Jaillet, P.: Decentralized stochastic planning with anonymity in interactions. In: AAAI, pp. 2505\u20132511 (2014)","DOI":"10.1609\/aaai.v28i1.9069"},{"key":"12_CR33","doi-asserted-by":"crossref","unstructured":"Verma, T., Varakantham, P., Lau, H.C.: Entropy based independent learning in anonymous multi-agent settings. In: ICAPS, pp. 655\u2013663 (2019)","DOI":"10.1609\/icaps.v29i1.3533"},{"key":"12_CR34","unstructured":"Wang, J., Ren, Z., Liu, T., Yu, Y., Zhang, C.: QPLEX: duplex dueling multi-agent q-learning. In: ICLR (2021)"},{"key":"12_CR35","unstructured":"Wang, W., Wu, G., Wu, W., Jiang, Y., An, B.: Online collective multiagent planning by offline policy reuse with applications to city-scale mobility-on-demand systems. In: AAMAS (2022)"},{"key":"12_CR36","unstructured":"Yang, Y., Luo, R., Li, M., Zhou, M., Zhang, W., Wang, J.: Mean field multi-agent reinforcement learning. In: ICML, vol. 80, pp. 5567\u20135576 (2018)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-26412-2_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,16]],"date-time":"2023-03-16T10:07:22Z","timestamp":1678961242000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-26412-2_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031264115","9783031264122"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-26412-2_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"17 March 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Grenoble","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"France","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2022.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1060","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"236","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"22% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17 demo track papers have been accepted from 28 submissions","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}