{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T21:50:49Z","timestamp":1757627449921,"version":"3.44.0"},"publisher-location":"Cham","reference-count":96,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031733536"},{"type":"electronic","value":"9783031733543"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73354-3_13","type":"book-chapter","created":{"date-parts":[[2025,8,18]],"date-time":"2025-08-18T12:06:35Z","timestamp":1755518795000},"page":"307-340","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Exploring the Efficiency vs. Fairness Behavioural Spectrum in Multi-Agent Deep Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Margarida","family":"Silva","sequence":"first","affiliation":[]},{"given":"Zafeiris","family":"Kokkinogenis","sequence":"additional","affiliation":[]},{"given":"Jeremy","family":"Pitt","sequence":"additional","affiliation":[]},{"given":"Rosaldo J. F.","family":"Rossetti","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,3,24]]},"reference":[{"issue":"1","key":"13_CR1","first-page":"71","volume":"14","author":"BH Abed-Alguni","year":"2016","unstructured":"B.H. Abed-Alguni, D.J. Paul, S.K. Chalup, F.A. Henskens, A comparison study of cooperative q-learning algorithms for independent learners. Int. J. Artif. Intell. 14(1), 71\u201393 (2016)","journal-title":"Int. J. Artif. Intell."},{"key":"13_CR2","doi-asserted-by":"publisher","first-page":"543","DOI":"10.1016\/j.endm.2013.05.136","volume":"41","author":"E Amaldi","year":"2013","unstructured":"E. Amaldi, S. Coniglio, L.G. Gianoli, C.U. Ileri, On single-path network routing subject to max-min fair flow allocation. Electron Notes Discrete Math. 41, 543\u2013550 (2013)","journal-title":"Electron Notes Discrete Math."},{"key":"13_CR3","unstructured":"W. Bao, Fairness in Multi-agent Reinforcement Learning for Stock Trading. arXiv (2019). http:\/\/arxiv.org\/abs\/2001.00918"},{"issue":"3731","key":"13_CR4","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1126\/science.153.3731.34","volume":"153","author":"R Bellman","year":"1966","unstructured":"R. Bellman, Dynamic programming. Science 153(3731), 34\u201337 (1966)","journal-title":"Science"},{"issue":"1","key":"13_CR5","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1287\/opre.1100.0865","volume":"59","author":"D Bertsimas","year":"2011","unstructured":"D. Bertsimas, V.F. Farias, N. Trichakis, The price of fairness. Oper. Res. 59(1), 17\u201331 (2011)","journal-title":"Oper. Res."},{"key":"13_CR6","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511598975","volume-title":"Fair Division: From Cake-Cutting to Dispute Resolution","author":"SJ Brams","year":"1996","unstructured":"S.J. Brams, S.J. Brams, A.D. Taylor, Fair Division: From Cake-Cutting to Dispute Resolution (Cambridge University Press, Cambridge, 1996)"},{"key":"13_CR7","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1016\/j.neucom.2017.02.096","volume":"263","author":"T Brys","year":"2017","unstructured":"T. Brys, A. Harutyunyan, P. Vrancx, A. Now\u00e9, M.E. Taylor, Multi-objectivization and ensembles of shapings in reinforcement learning. Neurocomputing 263, 48\u201359 (2017)","journal-title":"Neurocomputing"},{"key":"13_CR8","unstructured":"R. Busa-Fekete, B. Sz\u00f6r\u00e9nyi, P. Weng, S. Mannor, Multi-objective bandits: optimizing the generalized Gini index, in International Conference on Machine Learning. PMLR (2017), pp. 625\u2013634"},{"key":"13_CR9","doi-asserted-by":"crossref","unstructured":"L. Busoniu, R. Babuska, B. De Schutter, A comprehensive survey of multiagent reinforcement learning. IEEE Trans. Syst. Man Cybern. C (Appl. Rev.) 38(2), 156\u2013172 (2008)","DOI":"10.1109\/TSMCC.2007.913919"},{"key":"13_CR10","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1007\/978-3-642-14435-6_7","volume-title":"Multi-agent reinforcement learning: an overview, in Innovations in Multi-Agent Systems and Applications-1","author":"L Bu\u015foniu","year":"2010","unstructured":"L. Bu\u015foniu, R. Babu\u0161ka, B. De Schutter, Multi-agent reinforcement learning: an overview, in Innovations in Multi-Agent Systems and Applications-1 (Springer, Berlin, 2010), pp. 183\u2013221"},{"key":"13_CR11","unstructured":"J. Castellini, S. Devlin, F.A. Oliehoek, R. Savani, Difference Rewards Policy Gradients (2020). http:\/\/arxiv.org\/abs\/2012.11258"},{"key":"13_CR12","unstructured":"Y.H. Chang, T. Ho, L.P. Kaelbling, All learning is local: multi-agent learning in global reward games, in Advances in Neural Information Processing Systems 16 (NIPS 2003), 2003, ed. by S. Thrun, L. Saul, B. Sch\u00f6lkopf (2004). https:\/\/papers.nips.cc\/paper_files\/paper\/2003"},{"issue":"2","key":"13_CR13","doi-asserted-by":"publisher","first-page":"813","DOI":"10.1109\/JIOT.2020.3008299","volume":"8","author":"D Chen","year":"2021","unstructured":"D. Chen, Q. Qi, Z. Zhuang, J. Wang, J. Liao, Z. Han, Mean field deep reinforcement learning for fair and efficient UAV control. IEEE Internet Things J. 8(2), 813\u2013828 (2021). https:\/\/doi.org\/10.1109\/JIOT.2020.3008299","journal-title":"IEEE Internet Things J."},{"issue":"1","key":"13_CR14","first-page":"3","volume":"30","author":"Y Chevaleyre","year":"2006","unstructured":"Y. Chevaleyre, P.E. Dunne, U. Endriss, J. Lang, M. Lema\u00eetre, N. Maudet, J. Padget, S. Phelps, J.A. Rodr\u00edguez-Aguilar, P. Sousa, Issues in multiagent resource allocation. Informatica 30(1), 3\u201331 (2006)","journal-title":"Informatica"},{"key":"13_CR15","unstructured":"F. Chierichetti, R. Kumar, S. Lattanzi, S. Vassilvitskii, Fair clustering through fairlets. arXiv preprint arXiv:1802.05733 (2018)"},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"H. Claure, Y. Chen, J. Modi, M. Jung, S. Nikolaidis, Multi-armed bandits with fairness constraints for distributing resources to human teammates, in ACM\/IEEE International Conference on Human-Robot Interaction (2019), pp. 299\u2013308. http:\/\/arxiv.org\/abs\/1907.00313","DOI":"10.1145\/3319502.3374806"},{"key":"13_CR17","unstructured":"H. Claure, Y. Chen, J. Modi, M. Jung, S. Nikolaidis, Reinforcement learning with fairness constraints for resource distribution in human-robot teams. arXiv preprint arXiv:1907.00313 (2019)"},{"issue":"2","key":"13_CR18","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1017\/S026988890800132X","volume":"23","author":"S De Jong","year":"2008","unstructured":"S. De Jong, K. Tuyls, K. Verbeeck, Fairness in multi-agent systems. Knowl. Eng. Rev. 23(2), 153\u2013180 (2008)","journal-title":"Knowl. Eng. Rev."},{"key":"13_CR19","doi-asserted-by":"publisher","DOI":"10.1093\/oso\/9780198772101.001.0001","volume-title":"Optimization in Economic Theory","author":"AK Dixit","year":"1990","unstructured":"A.K. Dixit, J.J. Sherrerd, et al., Optimization in Economic Theory (Oxford University Press on Demand, Oxford, 1990)"},{"issue":"1","key":"13_CR20","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1287\/opre.50.1.48.17791","volume":"50","author":"S Dreyfus","year":"2002","unstructured":"S. Dreyfus, Richard bellman on the birth of dynamic programming. Oper. Res. 50(1), 48\u201351 (2002)","journal-title":"Oper. Res."},{"key":"13_CR21","unstructured":"G. Dulac-Arnold, D. Mankowitz, T. Hester, Challenges of Real-World Reinforcement Learning. arXiv (2019). http:\/\/arxiv.org\/abs\/1904.12901"},{"key":"13_CR22","doi-asserted-by":"crossref","unstructured":"C. Dwork, M. Hardt, T. Pitassi, O. Reingold, R. Zemel, Fairness through awareness, in Proceedings of the 3rd Innovations in Theoretical Computer Science Conference (2012), pp. 214\u2013226","DOI":"10.1145\/2090236.2090255"},{"key":"13_CR23","doi-asserted-by":"crossref","unstructured":"S. Elmalaki, Fair-iot: fairness-aware human-in-the-loop reinforcement learning for harnessing human variability in personalized IoT, in Proceedings of the International Conference on Internet-of-Things Design and Implementation (2021), pp. 119\u2013132","DOI":"10.1145\/3450268.3453525"},{"key":"13_CR24","doi-asserted-by":"crossref","unstructured":"J. Foerster, G. Farquhar, T. Afouras, N. Nardelli, S. Whiteson, Counterfactual multi-agent policy gradients, in 32nd AAAI Conference on Artificial Intelligence, AAAI 2018 (2017), pp. 2974\u20132982. http:\/\/arxiv.org\/abs\/1705.08926","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"13_CR25","unstructured":"T. Haarnoja, A. Zhou, P. Abbeel, S. Levine, Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor, in International Conference on Machine Learning. PMLR (2018), pp. 1861\u20131870"},{"key":"13_CR26","doi-asserted-by":"crossref","unstructured":"J. Hao, H.f. Leung, Fairness in Cooperative Multiagent Systems (2016), pp. 27\u201370. https:\/\/doi.org\/10.1007\/978-3-662-49470-7_3","DOI":"10.1007\/978-3-662-49470-7_3"},{"key":"13_CR27","unstructured":"M.J. Hausknecht, Cooperation and communication in multiagent deep reinforcement learning. Ph.D. thesis (2016)"},{"key":"13_CR28","unstructured":"M. Hausknecht, P. Stone, Deep recurrent q-learning for partially observable MDPs, in AAAI Fall Symposium. Technical Report, vol. FS-15-06. AI Access Foundation (2015), pp. 29\u201337. www.aaai.org"},{"issue":"1","key":"13_CR29","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1109\/SURV.2013.050113.00015","volume":"16","author":"S Huaizhou","year":"2013","unstructured":"S. Huaizhou, R.V. Prasad, E. Onur, I. Niemegeers, Fairness in wireless networks: issues, measures and challenges. IEEE Commun. Surv. Tutor. 16(1), 5\u201324 (2013)","journal-title":"IEEE Commun. Surv. Tutor."},{"key":"13_CR30","unstructured":"E. Hughes, J.Z. Leibo, M. Phillips, K. Tuyls, E. Due\u00f1ez-Guzman, A.G. Casta\u00f1eda, I. Dunning, T. Zhu, K. McKee, R. Koster, et al., Inequity aversion improves cooperation in intertemporal social dilemmas, in Proceedings of the 32nd International Conference on Neural Information Processing Systems (2018), pp. 3330\u20133340"},{"issue":"2","key":"13_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3054912","volume":"50","author":"A Hussein","year":"2017","unstructured":"A. Hussein, M.M. Gaber, E. Elyan, C. Jayne, Imitation learning: a survey of learning methods. ACM Comput. Surv. 50(2), 1\u201335 (2017)","journal-title":"ACM Comput. Surv."},{"key":"13_CR32","unstructured":"S. Iqbal, F. Sha, Actor-attention-critic for multi-agent reinforcement learning, in International Conference on Machine Learning. PMLR (2019), pp. 2961\u20132970"},{"key":"13_CR33","unstructured":"S. Jabbari, M. Joseph, M. Kearns, J. Morgenstern, A. Roth, Fairness in reinforcement learning, in International Conference on Machine Learning. PMLR (2017), pp. 1617\u20131626"},{"key":"13_CR34","volume-title":"A quantitative measure of fairness and discrimination","author":"RK Jain","year":"1984","unstructured":"R.K. Jain, D.M.W. Chiu, W.R. Hawe, et al., A quantitative measure of fairness and discrimination. Eastern Research Laboratory, Digital Equipment Corporation, Hudson (1984)"},{"key":"13_CR35","first-page":"13854","volume":"32","author":"J Jiang","year":"2019","unstructured":"J. Jiang, Z. Lu, Learning fairness in multi-agent systems. Adv. Neural Inf. Process. Syst. 32, 13854\u201313865 (2019)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"13_CR36","unstructured":"J. Jiang, C. Dun, T. Huang, Z. Lu, Graph convolutional reinforcement learning, in International Conference on Learning Representations (2019)"},{"key":"13_CR37","unstructured":"S. Jong, K. Tuyls, K. Verbeeck, N. Roos, Considerations for fairness in multi-agent systems. Undefined (2007)"},{"key":"13_CR38","unstructured":"M. Joseph, M. Kearns, J. Morgenstern, A. Roth, Fairness in learning: classic and contextual bandits. arXiv preprint arXiv:1605.07139 (2016)"},{"issue":"1\u20132","key":"13_CR39","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1016\/S0004-3702(98)00023-X","volume":"101","author":"LP Kaelbling","year":"1998","unstructured":"L.P. Kaelbling, M.L. Littman, A.R. Cassandra, Planning and acting in partially observable stochastic domains. Artif. Intell. 101(1\u20132), 99\u2013134 (1998). https:\/\/doi.org\/10.1016\/s0004-3702(98)00023-x","journal-title":"Artif. Intell."},{"key":"13_CR40","unstructured":"D.P. Kingma, J. Ba, Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"13_CR41","unstructured":"V.R. Konda, J.N. Tsitsiklis, Actor-critic algorithms, in Advances in Neural Information Processing Systems (2000), pp. 1008\u20131014"},{"key":"13_CR42","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1016\/j.neucom.2016.01.031","volume":"190","author":"L Kraemer","year":"2016","unstructured":"L. Kraemer, B. Banerjee, Multi-agent reinforcement learning as a rehearsal for decentralized planning. Neurocomputing 190, 82\u201394 (2016). https:\/\/doi.org\/10.1016\/j.neucom.2016.01.031","journal-title":"Neurocomputing"},{"key":"13_CR43","unstructured":"M.J. Kusner, J.R. Loftus, C. Russell, R. Silva, Counterfactual fairness. arXiv preprint arXiv:1703.06856 (2017)"},{"issue":"1","key":"13_CR44","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1002\/job.128","volume":"23","author":"K Lamertz","year":"2002","unstructured":"K. Lamertz, The social construction of fairness: social influence and sense making in organizations. J. Organ. Behav. 23(1), 19\u201337 (2002)","journal-title":"J. Organ. Behav."},{"key":"13_CR45","unstructured":"T.P. Lillicrap, J.J. Hunt, A. Pritzel, N. Heess, T. Erez, Y. Tassa, D. Silver, D. Wierstra, Continuous control with deep reinforcement learning, in 4th International Conference on Learning Representations, ICLR 2016 - Conference Track Proceedings. International Conference on Learning Representations, ICLR (2016). https:\/\/goo.gl\/J4PIAz"},{"key":"13_CR46","doi-asserted-by":"publisher","unstructured":"S. Liu, M. Araujo, E. Brunskill, R. Rossetti, J. Barros, R. Krishnan, Understanding sequential decisions via inverse reinforcement learning, in 2013 IEEE 14th International Conference on Mobile Data Management, vol. 1 (2013), pp. 177\u2013186. https:\/\/doi.org\/10.1109\/MDM.2013.28","DOI":"10.1109\/MDM.2013.28"},{"key":"13_CR47","unstructured":"R. Lowe, Y. Wu, A. Tamar, J. Harb, P. Abbeel, I. Mordatch, Multi-agent actor-critic for mixed cooperative-competitive environments. Adv. Neural Inf. Process. Syst. 2017-December, 6380\u20136391 (2017). http:\/\/arxiv.org\/abs\/1706.02275"},{"key":"13_CR48","doi-asserted-by":"publisher","DOI":"10.1002\/9781118449189","volume-title":"Equitable Resource Allocation: Models, Algorithms and Applications","author":"H Luss","year":"2012","unstructured":"H. Luss, Equitable Resource Allocation: Models, Algorithms and Applications, vol. 101. (John Wiley & Sons, Hoboken, 2012)"},{"key":"13_CR49","unstructured":"L. Matignon, L. Jeanpierre, A.I. Mouaddib, Coordinated multi-robot exploration under communication constraints using decentralized Markov decision processes, in Twenty-Sixth AAAI Conference on Artificial Intelligence (2012)"},{"key":"13_CR50","unstructured":"V. Mnih, A.P. Badia, M. Mirza, A. Graves, T.P. Lillicrap, T. Harley, D. Silver, K. Kavukcuoglu, Asynchronous methods for deep reinforcement learning, in 33rd International Conference on Machine Learning, ICML 2016, vol. 4 (2016), pp. 2850\u20132869. http:\/\/arxiv.org\/abs\/1602.01783"},{"issue":"5","key":"13_CR51","doi-asserted-by":"publisher","first-page":"556","DOI":"10.1109\/90.879343","volume":"8","author":"J Mo","year":"2000","unstructured":"J. Mo, J. Walrand, Fair end-to-end window-based congestion control. IEEE\/ACM Trans. Netw. 8(5), 556\u2013567 (2000)","journal-title":"IEEE\/ACM Trans. Netw."},{"key":"13_CR52","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/2954.001.0001","volume-title":"Fair Division and Collective Welfare","author":"H Moulin","year":"2003","unstructured":"H. Moulin, Fair Division and Collective Welfare (MIT Press, Cambridge, 2003)"},{"key":"13_CR53","doi-asserted-by":"crossref","unstructured":"A. Neidhardt, H. Luss, K. Krishnan, Data fusion and optimal placement of fixed and mobile sensors, in 2008 IEEE Sensors Applications Symposium. IEEE (2008), pp. 128\u2013133","DOI":"10.1109\/SAS13374.2008.4472957"},{"key":"13_CR54","unstructured":"A.Y. Ng, S. Russell, Algorithms for inverse reinforcement learning, in Proc. 17th International Conf. on Machine Learning. Morgan Kaufmann (2000), pp. 663\u2013670"},{"issue":"05","key":"13_CR55","doi-asserted-by":"publisher","first-page":"1021","DOI":"10.1142\/S0219622013400075","volume":"12","author":"W Ogryczak","year":"2013","unstructured":"W. Ogryczak, P. Perny, P. Weng, A compromise programming approach to multiobjective markov decision processes. Int. J. Inf. Technol. Decis. Mak. 12(05), 1021\u20131053 (2013)","journal-title":"Int. J. Inf. Technol. Decis. Mak."},{"key":"13_CR56","doi-asserted-by":"crossref","unstructured":"W. Ogryczak, H. Luss, M. Pi\u00f3ro, D. Nace, A. Tomaszewski, Fair optimization and networks: a survey. J. Appl. Math. 2014 (2014)","DOI":"10.1155\/2014\/612018"},{"key":"13_CR57","doi-asserted-by":"crossref","unstructured":"F.A. Oliehoek, C. Amato, A Concise Introduction to Decentralized POMDPs. SpringerBriefs in Intelligent Systems. Springer International Publishing, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-28929-8. http:\/\/link.springer.com\/10.1007\/978-3-319-28929-8","DOI":"10.1007\/978-3-319-28929-8"},{"key":"13_CR58","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1613\/jair.2447","volume":"32","author":"FA Oliehoek","year":"2008","unstructured":"F.A. Oliehoek, M.T. Spaan, N. Vlassis, Optimal and approximate Q-value functions for decentralized POMDPs. J. Artif. Intell. Res. 32, 289\u2013353 (2008). https:\/\/doi.org\/10.1613\/jair.2447","journal-title":"J. Artif. Intell. Res."},{"key":"13_CR59","unstructured":"A. OroojlooyJadid, D. Hajinezhad, A Review of Cooperative Multi-Agent Deep Reinforcement Learning. arXiv (2019). http:\/\/arxiv.org\/abs\/1908.03963"},{"key":"13_CR60","doi-asserted-by":"crossref","unstructured":"M. Pi\u00f3ro, G. Malicsk\u00f3, G. Fodor, Optimal link capacity dimensioning in proportionally fair networks, in International Conference on Research in Networking (Springer, 2002), pp. 277\u2013288","DOI":"10.1007\/3-540-47906-6_22"},{"key":"13_CR61","doi-asserted-by":"publisher","unstructured":"J. Pitt, Interactional justice and self-governance of open self-organising systems, in Proceedings - 11th IEEE International Conference on Self-Adaptive and Self-Organizing Systems, SASO 2017 (Institute of Electrical and Electronics Engineers Inc., 2017), pp. 31\u201340. https:\/\/doi.org\/10.1109\/SASO.2017.12","DOI":"10.1109\/SASO.2017.12"},{"key":"13_CR62","doi-asserted-by":"crossref","unstructured":"J. Pitt, D. Busquets, S. Macbeth, Distributive justice for self-organised common-pool resource management. ACM Trans. Auton. Adapt. Syst. 9(3), 1\u201339 (2014). https:\/\/doi.org\/10.1145\/2629567. https:\/\/dl.acm.org\/doi\/10.1145\/2629567","DOI":"10.1145\/2629567"},{"key":"13_CR63","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"ML Puterman","year":"2014","unstructured":"M.L. Puterman, Markov Decision Processes: Discrete Stochastic Dynamic Programming (John Wiley & Sons, Hoboken, 2014)"},{"key":"13_CR64","unstructured":"T. Rashid, M. Samvelyan, C.S. de Witt, G. Farquhar, J. Foerster, S. Whiteson, QMIX: Monotonic Value Function Factorisation for Deep Multi-Agent Reinforcement Learning (2018). http:\/\/arxiv.org\/abs\/1803.11485"},{"key":"13_CR65","doi-asserted-by":"publisher","DOI":"10.4159\/9780674042605","volume-title":"A Theory of Justice","author":"J Rawls","year":"1971","unstructured":"J. Rawls, A Theory of Justice (Harvard University Press, Cambridge, 1971). http:\/\/www.jstor.org\/stable\/j.ctvjf9z6v"},{"key":"13_CR66","unstructured":"H. Ryu, H. Shin, J. Park, Multi-Agent Actor-Critic with Hierarchical Graph Attention Network. arXiv (2019). http:\/\/arxiv.org\/abs\/1909.12557"},{"key":"13_CR67","doi-asserted-by":"crossref","unstructured":"N.A. Saxena, K. Huang, E. DeFilippis, G. Radanovic, D.C. Parkes, Y. Liu, How do fairness definitions fare? Examining public attitudes towards algorithmic definitions of fairness, in Proceedings of the 2019 AAAI\/ACM Conference on AI, Ethics, and Society (2019), pp. 99\u2013106","DOI":"10.1145\/3306618.3314248"},{"key":"13_CR68","unstructured":"J. Schulman, S. Levine, P. Abbeel, M. Jordan, P. Moritz, Trust region policy optimization, in International Conference on Machine Learning. PMLR (2015), pp. 1889\u20131897"},{"key":"13_CR69","unstructured":"J. Schulman, P. Moritz, S. Levine, M. Jordan, P. Abbeel, High-dimensional continuous control using generalized advantage estimation. arXiv preprint arXiv:1506.02438 (2015)"},{"key":"13_CR70","unstructured":"J. Schulman, F. Wolski, P. Dhariwal, A. Radford, O. Klimov, Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"13_CR71","unstructured":"H. Shen, K. Zhang, M. Hong, T. Chen, Asynchronous Advantage Actor Critic: Non-asymptotic Analysis and Linear Speedup (2020). http:\/\/arxiv.org\/abs\/2012.15511"},{"key":"13_CR72","doi-asserted-by":"publisher","unstructured":"H. Shi, R.V. Prasad, E. Onur, I.G.M.M. Niemegeers, Fairness in wireless networks: issues, measures and challenges. IEEE Commun. Surv. Tutor. 16(1), 5\u201324. First Quarter 2014, https:\/\/doi.org\/10.1109\/SURV.2013.050113.00015","DOI":"10.1109\/SURV.2013.050113.00015"},{"key":"13_CR73","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511811654","volume-title":"Multiagent Systems: Algorithmic, Game-Theoretic, and Logical Foundations","author":"Y Shoham","year":"2008","unstructured":"Y. Shoham, K. Leyton-Brown, Multiagent Systems: Algorithmic, Game-Theoretic, and Logical Foundations (Cambridge University Press, Cambridge, 2008)"},{"key":"13_CR74","doi-asserted-by":"publisher","first-page":"613","DOI":"10.2307\/1913126","volume":"48","author":"AF Shorrocks","year":"1980","unstructured":"A.F. Shorrocks, The class of additively decomposable inequality measures. Econometrica 48, 613\u201325 (1980)","journal-title":"Econometrica"},{"key":"13_CR75","unstructured":"U. Siddique, P. Weng, M. Zimmer, Learning fair policies in multi-objective (deep) reinforcement learning with average and discounted rewards, in International Conference on Machine Learning. PMLR (2020), pp. 8905\u20138915"},{"key":"13_CR76","unstructured":"K. Son, D. Kim, W.J. Kang, D.E. Hostallero, Y. Yi, Qtran: learning to factorize with transformation for cooperative multi-agent reinforcement learning, in International Conference on Machine Learning. PMLR (2019), pp. 5887\u20135896"},{"key":"13_CR77","unstructured":"M.T. Spaan, Partially observable markov decision processes, in Adaptation, Learning, and Optimization, vol. 12. Springer Verlag (2012), pp. 387\u2013414. https:\/\/doi.org\/10.1007\/978-3-642-27645-3{textbackslash_}12. https:\/\/link.springer.com\/chapter\/10.1007\/978-3-642-27645-3_12"},{"key":"13_CR78","doi-asserted-by":"crossref","unstructured":"T. Speicher, H. Heidari, N. Grgic-Hlaca, K.P. Gummadi, A. Singla, A. Weller, M.B. Zafar, A unified approach to quantifying algorithmic unfairness: measuring individual &group unfairness via inequality indices, in Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining (2018), pp. 2239\u20132248","DOI":"10.1145\/3219819.3220046"},{"key":"13_CR79","unstructured":"P. Sunehag, G. Lever, A. Gruslys, W.M. Czarnecki, V. Zambaldi, M. Jaderberg, M. Lanctot, N. Sonnerat, J.Z. Leibo, K. Tuyls, T. Graepel, Value-decomposition networks for cooperative multi-agent learning based on team reward, in Proceedings of the International Joint Conference on Autonomous Agents and Multiagent Systems, AAMAS, vol. 3 (2018), pp. 2085\u20132087. http:\/\/arxiv.org\/abs\/1706.05296"},{"key":"13_CR80","volume-title":"Reinforcement Learning: An Introduction","author":"RS Sutton","year":"2018","unstructured":"R.S. Sutton, A.G. Barto, Reinforcement Learning: An Introduction (A Bradford Book, Cambridge, 2018)"},{"key":"13_CR81","doi-asserted-by":"crossref","unstructured":"M. Tan, Multi-agent reinforcement learning: independent vs. cooperative agents, in Proceedings of the Tenth International Conference on Machine Learning (1993), pp. 330\u2013337","DOI":"10.1016\/B978-1-55860-307-3.50049-6"},{"key":"13_CR82","doi-asserted-by":"crossref","unstructured":"S. Verma, J. Rubin, Fairness definitions explained, in 2018 IEEE\/ACM International Workshop on Software Fairness (Fairware). IEEE (2018), pp. 1\u20137","DOI":"10.1145\/3194770.3194776"},{"key":"13_CR83","doi-asserted-by":"crossref","unstructured":"J. Wang, Y. Zhang, T.K. Kim, Y. Gu, Shapley q-value: a local reward approach to solve global reward games, in Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34 (2020), pp. 7285\u20137292","DOI":"10.1609\/aaai.v34i05.6220"},{"key":"13_CR84","unstructured":"C.J.C.H. Watkins, Learning from delayed rewards (Doctoral dissertation). University of Cambridge (1989)"},{"issue":"3\u20134","key":"13_CR85","first-page":"279","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"C.J. Watkins, P. Dayan, Q-learning. Mach. Learn. 8(3\u20134), 279\u2013292 (1992)","journal-title":"Mach. Learn."},{"key":"13_CR86","unstructured":"P. Weng, Fairness in reinforcement learning, in 34th International Conference on Machine Learning, ICML 2017, vol. 4 (2019), pp. 2542\u20132557. http:\/\/arxiv.org\/abs\/1907.10323"},{"issue":"4","key":"13_CR87","doi-asserted-by":"publisher","first-page":"409","DOI":"10.1016\/0165-4896(81)90018-4","volume":"1","author":"JA Weymark","year":"1981","unstructured":"J.A. Weymark, Generalized Gini inequality indices. Math. Soc. Sci. 1(4), 409\u2013430 (1981)","journal-title":"Math. Soc. Sci."},{"key":"13_CR88","doi-asserted-by":"crossref","unstructured":"R.J. Williams, Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach. Learn. 8(3\u20134), 229\u2013256 (1992). https:\/\/doi.org\/10.1007\/bf00992696. https:\/\/link.springer.com\/article\/10.1007\/BF00992696","DOI":"10.1007\/BF00992696"},{"key":"13_CR89","unstructured":"E. Yang, D. Gu, Multiagent Reinforcement Learning for Multi-Robot Systems: A Survey. Undefined (2004)"},{"key":"13_CR90","doi-asserted-by":"crossref","unstructured":"L. Yliniemi, K. Tumer, Multi-objective multiagent credit assignment through difference rewards in reinforcement learning, in Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics), vol. 8886 (2014), pp. 407\u2013418. https:\/\/doi.org\/10.1007\/978-3-319-13563-2_35. https:\/\/link.springer.com\/chapter\/10.1007\/978-3-319-13563-2_35","DOI":"10.1007\/978-3-319-13563-2_35"},{"key":"13_CR91","unstructured":"M. Yuan, Q. Cao, M.o. Pun, Y. Chen, Fairness-Oriented Scheduling for Bursty Traffic in OFDMA Downlink Systems Using Multi-Agent Reinforcement Learning (2020). http:\/\/arxiv.org\/abs\/2012.15081"},{"key":"13_CR92","unstructured":"C. Zhang, J.A. Shah, Fairness in multi-agent sequential decision-making, in Advances in Neural Information Processing Systems (2014), pp. 2636\u20132644"},{"key":"13_CR93","doi-asserted-by":"crossref","unstructured":"K. Zhang, Z. Yang, H. Liu, T. Zhang, T. Basar, Fully decentralized multi-agent reinforcement learning with networked agents, in International Conference on Machine Learning. PMLR (2018), pp. 5872\u20135881","DOI":"10.1109\/CDC.2018.8619581"},{"key":"13_CR94","unstructured":"M. Zimmer, C. Glanois, U. Siddique, P. Weng, Learning fair policies in decentralized cooperative multi-agent reinforcement learning, in International Conference on Machine Learning (2021)"},{"issue":"4","key":"13_CR95","doi-asserted-by":"publisher","first-page":"1060","DOI":"10.1007\/s10618-017-0506-1","volume":"31","author":"I \u017dliobait\u0117","year":"2017","unstructured":"I. \u017dliobait\u0117, Measuring discrimination in algorithmic decision making. Data Min. Knowl. Discov. 31(4), 1060\u20131089 (2017)","journal-title":"Data Min. Knowl. Discov."},{"key":"13_CR96","unstructured":"H. Zou, T. Ren, D. Yan, H. Su, J. Zhu, Reward Shaping via Meta-Learning (2019). http:\/\/arxiv.org\/abs\/1901.09330"}],"container-title":["Machine Learning Perspectives of Agent-Based Models"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73354-3_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,10]],"date-time":"2025-09-10T01:50:46Z","timestamp":1757469046000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73354-3_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031733536","9783031733543"],"references-count":96,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73354-3_13","relation":{},"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"24 March 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}