{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T22:32:47Z","timestamp":1780353167096,"version":"3.54.1"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030864859","type":"print"},{"value":"9783030864866","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-86486-6_10","type":"book-chapter","created":{"date-parts":[[2021,9,9]],"date-time":"2021-09-09T15:25:48Z","timestamp":1631201148000},"page":"157-173","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["CMIX: Deep Multi-agent Reinforcement Learning with Peak and Average Constraints"],"prefix":"10.1007","author":[{"given":"Chenyi","family":"Liu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nan","family":"Geng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Vaneet","family":"Aggarwal","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tian","family":"Lan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuan","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mingwei","family":"Xu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2021,9,10]]},"reference":[{"key":"10_CR1","doi-asserted-by":"crossref","unstructured":"Abedi, A., Ghaderi, M., Williamson, C.: Distributed routing for vehicular ad hoc networks: throughput-delay tradeoff. In: IEEE MASCOTS (2010)","DOI":"10.1109\/MASCOTS.2010.14"},{"key":"10_CR2","first-page":"4714","volume":"20","author":"AO Al-Abbasi","year":"2019","unstructured":"Al-Abbasi, A.O., Ghosh, A., Aggarwal, V.: Deeppool: distributed model-free algorithm for ride-sharing using deep reinforcement learning. IEEE TITS 20, 4714\u20134727 (2019)","journal-title":"IEEE TITS"},{"key":"10_CR3","unstructured":"Bai, Q., Gattami, A., Aggarwal, V.: Provably efficient model-free algorithm for MDPs with peak constraints. arXiv preprint arXiv:2003.05555 (2021)"},{"key":"10_CR4","unstructured":"Chow, Y., Nachum, O., Duenez-Guzman, E., Ghavamzadeh, M.: A Lyapunov-based approach to safe reinforcement learning. In: NeurIPS (2018)"},{"key":"10_CR5","unstructured":"Diddigi, R.B., Danda, S.K.R., Bhatnagar, S., et al.: Actor-critic algorithms for constrained multi-agent reinforcement learning. arXiv preprint arXiv:1905.02907 (2019)"},{"key":"10_CR6","unstructured":"Ding, D., Wei, X., Yang, Z., Wang, Z., Jovanovi\u0107, M.R.: Provably efficient safe exploration via primal-dual policy optimization. In: AISTATS (2021)"},{"key":"10_CR7","doi-asserted-by":"crossref","unstructured":"Foerster, J., Farquhar, G., Afouras, T., Nardelli, N., Whiteson, S.: Counterfactual multi-agent policy gradients. In: AAAI (2018)","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"10_CR8","first-page":"1437","volume":"16","author":"J Garc\u0131a","year":"2015","unstructured":"Garc\u0131a, J., Fern\u00e1ndez, F.: A comprehensive survey on safe reinforcement learning. JMLR 16, 1437\u20131480 (2015)","journal-title":"JMLR"},{"key":"10_CR9","unstructured":"Gattami, A.: Reinforcement learning of Markov decision processes with peak constraints. arXiv preprint arXiv:1901.07839 (2019)"},{"key":"10_CR10","unstructured":"Gattami, A., Bai, Q., Aggarwal, V.: Reinforcement learning for multi-objective and constrained Markov decision processes. In: AISTATS (2021)"},{"key":"10_CR11","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"646","DOI":"10.1007\/11871842_63","volume-title":"Machine Learning: ECML 2006","author":"P Geibel","year":"2006","unstructured":"Geibel, P.: Reinforcement learning for MDPs with constraints. In: F\u00fcrnkranz, J., Scheffer, T., Spiliopoulou, M. (eds.) ECML 2006. LNCS (LNAI), vol. 4212, pp. 646\u2013653. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11871842_63"},{"key":"10_CR12","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1613\/jair.1666","volume":"24","author":"P Geibel","year":"2005","unstructured":"Geibel, P., Wysotzki, F.: Risk-sensitive reinforcement learning applied to control under constraints. JAIR 24, 81\u2013108 (2005)","journal-title":"JAIR"},{"key":"10_CR13","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1007\/978-3-319-71682-4_5","volume-title":"Autonomous Agents and Multiagent Systems","author":"JK Gupta","year":"2017","unstructured":"Gupta, J.K., Egorov, M., Kochenderfer, M.: Cooperative multi-agent control using deep reinforcement learning. In: Sukthankar, G., Rodriguez-Aguilar, J.A. (eds.) AAMAS 2017. LNCS (LNAI), vol. 10642, pp. 66\u201383. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-71682-4_5"},{"key":"10_CR14","unstructured":"Ha, D., Dai, A., Le, Q.V.: Hypernetworks. arXiv preprint arXiv:1609.09106 (2016)"},{"key":"10_CR15","doi-asserted-by":"crossref","unstructured":"Kassir, S., de Veciana, G., Wang, N., Wang, X., Palacharla, P.: Enhancing cellular performance via vehicular-based opportunistic relaying and load balancing. In: IEEE INFOCOM (2019)","DOI":"10.1109\/INFOCOM.2019.8737611"},{"key":"10_CR16","first-page":"2217","volume":"18","author":"Z Li","year":"2017","unstructured":"Li, Z., Wang, C., Jiang, C.J.: User association for load balancing in vehicular networks: an online reinforcement learning approach. IEEE TITS 18, 2217\u20132228 (2017)","journal-title":"IEEE TITS"},{"key":"10_CR17","unstructured":"Mahajan, A., Rashid, T., Samvelyan, M., Whiteson, S.: Maven: multi-agent variational exploration. arXiv preprint arXiv:1910.07483 (2019)"},{"key":"10_CR18","unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)"},{"key":"10_CR19","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518, 529\u2013533 (2015)","journal-title":"Nature"},{"key":"10_CR20","doi-asserted-by":"crossref","unstructured":"Nguyen, D.T., Yeoh, W., Lau, H.C., Zilberstein, S., Zhang, C.: Decentralized multi-agent reinforcement learning in average-reward dynamic DCOPs. In: AAAI (2014)","DOI":"10.1609\/aaai.v28i1.8886"},{"key":"10_CR21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8","volume-title":"A Concise Introduction to Decentralized POMDPs","author":"FA Oliehoek","year":"2016","unstructured":"Oliehoek, F.A., Amato, C., et al.: A Concise Introduction to Decentralized POMDPs, vol. 1. Springer, Heidelberg (2016). https:\/\/doi.org\/10.1007\/978-3-319-28929-8"},{"key":"10_CR22","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1007\/s10994-016-5569-5","volume":"105","author":"L Prashanth","year":"2016","unstructured":"Prashanth, L., Ghavamzadeh, M.: Variance-constrained actor-critic algorithms for discounted and average reward MDPs. Mach. Learn. 105, 367\u2013417 (2016). https:\/\/doi.org\/10.1007\/s10994-016-5569-5","journal-title":"Mach. Learn."},{"key":"10_CR23","unstructured":"Rashid, T., Farquhar, G., Peng, B., Whiteson, S.: Weighted QMIX: expanding monotonic value function factorisation for deep multi-agent reinforcement learning. In: NeurIPS (2020)"},{"key":"10_CR24","unstructured":"Rashid, T., Samvelyan, M., Schroeder, C., Farquhar, G., Foerster, J., Whiteson, S.: QMIX: monotonic value function factorisation for deep multi-agent reinforcement learning. In: ICML (2018)"},{"key":"10_CR25","first-page":"4560","volume":"60","author":"H Saleet","year":"2011","unstructured":"Saleet, H., Langar, R., Naik, K., Boutaba, R., Nayak, A., Goel, N.: Intersection-based geographical routing protocol for VANETs: a proposal and analysis. IEEE TVT 60, 4560\u20134574 (2011)","journal-title":"IEEE TVT"},{"key":"10_CR26","unstructured":"Sunehag, P., et al.: Value-decomposition networks for cooperative multi-agent learning based on team reward. In: Springer AAMAS (2018)"},{"key":"10_CR27","doi-asserted-by":"crossref","unstructured":"Tan, M.: Multi-agent reinforcement learning: independent vs. cooperative agents. In: ICML (1993)","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"10_CR28","doi-asserted-by":"crossref","unstructured":"Wang, F., Wang, F., Liu, J., Shea, R., Sun, L.: Intelligent video caching at network edge: a multi-agent deep reinforcement learning approach. In: IEEE INFOCOM (2020)","DOI":"10.1109\/INFOCOM41043.2020.9155373"},{"key":"10_CR29","unstructured":"Yang, Y., et al.: Multi-agent determinantal Q-learning. In: ICML (2020)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-86486-6_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T22:08:17Z","timestamp":1757369297000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-86486-6_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030864859","9783030864866"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-86486-6_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"10 September 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bilbao","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2021.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"869","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"210","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-9","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held online due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}