{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,22]],"date-time":"2025-12-22T21:40:06Z","timestamp":1766439606618,"version":"3.48.0"},"reference-count":50,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T00:00:00Z","timestamp":1764115200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T00:00:00Z","timestamp":1764115200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s10994-025-06893-z","type":"journal-article","created":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T19:54:58Z","timestamp":1764186898000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Preference-based opponent shaping in differentiable games"],"prefix":"10.1007","volume":"114","author":[{"given":"Xinyu","family":"Qiao","sequence":"first","affiliation":[]},{"given":"Yudong","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Congying","family":"Han","sequence":"additional","affiliation":[]},{"given":"Weiyan","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Tiande","family":"Guo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,26]]},"reference":[{"key":"6893_CR1","unstructured":"Albrecht, S. V., & Stone, P. (2019). Reasoning about hypothetical agent behaviours and their parameters. arXiv preprint arXiv:1906.11064"},{"key":"6893_CR2","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1177\/002200278002400101","volume":"24","author":"R Axelrod","year":"1980","unstructured":"Axelrod, R. (1980). Effective choice in the prisoner\u2019s dilemma. Journal of Conflict Resolution, 24, 3\u201325.","journal-title":"Journal of Conflict Resolution"},{"issue":"4489","key":"6893_CR3","doi-asserted-by":"publisher","first-page":"1390","DOI":"10.1126\/science.7466396","volume":"211","author":"RH Axelrod","year":"1981","unstructured":"Axelrod, R. H., & Hamilton, W. D. (1981). The evolution of cooperation. Science, 211(4489), 1390\u20131396.","journal-title":"Science"},{"key":"6893_CR4","unstructured":"Azizian, W., Mitliagkas, I., Lacoste-Julien, S., et\u00a0al. (2020). A tight and unified analysis of gradient-based methods for a whole spectrum of differentiable games. International Conference on Artificial Intelligence and Statistics. https:\/\/api.semanticscholar.org\/CorpusID:220089599"},{"key":"6893_CR5","unstructured":"Balduzzi, D., Racaniere, S., Martens, J., et\u00a0al. (2018). The mechanics of n-player differentiable games. International Conference on Machine Learning, pp. 354\u2013363."},{"key":"6893_CR6","doi-asserted-by":"crossref","unstructured":"Budinich, M., & Fortnow, L. (2011). Repeated matching pennies with limited randomness. arXiv:1102.1096. https:\/\/api.semanticscholar.org\/CorpusID:8398158","DOI":"10.1145\/1993574.1993592"},{"key":"6893_CR7","doi-asserted-by":"crossref","unstructured":"Bu\u015foniu, L., Babu\u0161ka, R., & De\u00a0Schutter, B. (2010). Multi-agent reinforcement learning: An overview. Innovations in multi-agent systems and applications-1, pp. 183\u2013221.","DOI":"10.1007\/978-3-642-14435-6_7"},{"key":"6893_CR8","unstructured":"Curry, M., Trott, A., Phade, S., et\u00a0al. (2023). Learning solutions in large economic networks using deep multi-agent reinforcement learning. In: AAMAS, pp. 2760\u20132762."},{"key":"6893_CR9","doi-asserted-by":"crossref","unstructured":"Falk, A., Fehr, E., & Fischbacher, U. (1999). On the nature of fair behavior. Behavioral & Experimental Economics. https:\/\/api.semanticscholar.org\/CorpusID:13924694","DOI":"10.2139\/ssrn.203289"},{"key":"6893_CR10","unstructured":"Foerster, J. N., Chen, R. Y., Al-Shedivat, M., et\u00a0al. (2017). Learning with opponent-learning awareness. arXiv preprint arXiv:1709.04326"},{"key":"6893_CR11","unstructured":"Fung, K., Zhang, Q., Lu, C., et\u00a0al. (2024). Analysing the sample complexity of opponent shaping. arXiv preprint arXiv:2402.05782"},{"key":"6893_CR12","unstructured":"Gemp, I., McKee, K. R., Everett, R., et\u00a0al. (2022). D3c: Reducing the price of anarchy in multi-agent learning. arxiv:2010.00575"},{"key":"6893_CR13","doi-asserted-by":"crossref","unstructured":"Gupta, J. K., Egorov, M., & Kochenderfer, M. (2017). Cooperative multi-agent control using deep reinforcement learning. In Autonomous agents and multiagent systems: AAMAS 2017 workshops, Best Papers, S\u00e3o Paulo, Brazil, May 8\u201312, 2017, Revised Selected Papers 16, Springer, pp. 66\u201383.","DOI":"10.1007\/978-3-319-71682-4_5"},{"issue":"4","key":"6893_CR14","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1016\/0167-2681(82)90011-7","volume":"3","author":"W G\u00fcth","year":"1982","unstructured":"G\u00fcth, W., Schmittberger, R., & Schwarze, B. (1982). An experimental analysis of ultimatum bargaining. Journal of Economic Behavior & Organization, 3(4), 367\u2013388.","journal-title":"Journal of Economic Behavior & Organization"},{"key":"6893_CR15","doi-asserted-by":"crossref","unstructured":"Harper, M., Knight, V. A., Jones, M., et\u00a0al. (2017). Reinforcement learning produces dominant strategies for the iterated prisoner\u2019s dilemma. PLoS ONE, 12. https:\/\/api.semanticscholar.org\/CorpusID:9800255","DOI":"10.1371\/journal.pone.0188046"},{"key":"6893_CR16","unstructured":"He, H., Boyd-Graber, J., Kwok, K., et\u00a0al. (2016). Opponent modeling in deep reinforcement learning. In International conference on machine learning, PMLR, pp. 1804\u20131813."},{"key":"6893_CR17","unstructured":"Hostallero, D. E., Kim, D., Moon, S., et\u00a0al. (2020). Inducing cooperation through reward reshaping based on peer evaluations in deep multi-agent reinforcement learning. In Proceedings of the 19th international conference on autonomous agents and MultiAgent systems, pp. 520\u2013528."},{"issue":"Nov","key":"6893_CR18","first-page":"1039","volume":"4","author":"J Hu","year":"2003","unstructured":"Hu, J., & Wellman, M. P. (2003). Nash q-learning for general-sum stochastic games. Journal of Machine Learning Research, 4(Nov), 1039\u20131069.","journal-title":"Journal of Machine Learning Research"},{"issue":"13","key":"6893_CR19","doi-asserted-by":"publisher","first-page":"17194","DOI":"10.1007\/s10489-022-04249-x","volume":"53","author":"Y Hu","year":"2023","unstructured":"Hu, Y., Han, C., Li, H., et al. (2023). Modeling opponent learning in multiagent repeated games. Applied Intelligence, 53(13), 17194\u201317210.","journal-title":"Applied Intelligence"},{"key":"6893_CR20","unstructured":"Kim, D. K., Liu, M., Riemer, M. D., et\u00a0al. (2021). A policy gradient algorithm for learning to learn in multiagent reinforcement learning. In International conference on machine learning, PMLR, pp. 5541\u20135550."},{"key":"6893_CR21","unstructured":"Kwon, M., Agapiou, J. P., Du\u00e9\u00f1ez-Guzm\u00e1n, E. A., et\u00a0al. (2023). Auto-aligning multiagent incentives with global objectives. In ICML workshop on localized learning (LLW), https:\/\/openreview.net\/forum?id=U5gXo9zqNt"},{"key":"6893_CR22","unstructured":"Lee, K. T., & Louis, K. A. (1967). The application of decision theory and dynamic programming to adaptive control systems. https:\/\/api.semanticscholar.org\/CorpusID:62278693"},{"key":"6893_CR23","unstructured":"Letcher, A., Foerster, J., Balduzzi, D., et\u00a0al. (2018). Stable opponent shaping in differentiable games. arXiv preprint arXiv:1811.08469"},{"key":"6893_CR24","unstructured":"Lu, C., Willi, T., De\u00a0Witt, C. A. S., et\u00a0al. (2022). Model-free opponent shaping. International Conference on Machine Learning, PMLR, pp. 14398\u201314411."},{"key":"6893_CR25","unstructured":"Lupu, A., & Precup, D. (2020). Gifting in multi-agent reinforcement learning. In Proceedings of the 19th international conference on autonomous agents and multiagent systems, pp. 789\u2013797."},{"key":"6893_CR26","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1038\/292291a0","volume":"292","author":"RM May","year":"1981","unstructured":"May, R. M. (1981). The evolution of cooperation. Nature, 292, 291\u2013292.","journal-title":"Nature"},{"key":"6893_CR27","unstructured":"McKee, K. R., Gemp, I., McWilliams, B., et\u00a0al. (2020). Social diversity and social preferences in mixed-motive reinforcement learning. arXiv preprint arXiv:2002.02325"},{"issue":"1","key":"6893_CR28","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1109\/TCIAIG.2015.2491611","volume":"9","author":"R Mealing","year":"2015","unstructured":"Mealing, R., & Shapiro, J. L. (2015). Opponent modeling by expectation-maximization and sequence prediction in simplified poker. IEEE Transactions on Computational Intelligence and AI in Games, 9(1), 11\u201324.","journal-title":"IEEE Transactions on Computational Intelligence and AI in Games"},{"key":"6893_CR29","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1023\/B:EXEC.0000026978.14316.74","volume":"7","author":"H Oosterbeek","year":"2004","unstructured":"Oosterbeek, H., Sloof, R., & Van De Kuilen, G. (2004). Cultural differences in ultimatum game experiments: Evidence from a meta-analysis. Experimental Economics, 7, 171\u2013188.","journal-title":"Experimental Economics"},{"key":"6893_CR30","unstructured":"Radke, D., Larson, K., & Brecht, T. (2022). The importance of credo in multiagent learning. arXiv preprint arXiv:2204.07471"},{"key":"6893_CR31","unstructured":"Ro\u00fcsseau, J. J. (1984). A discourse on inequality. Penguin."},{"key":"6893_CR32","doi-asserted-by":"crossref","unstructured":"Sandoval, E. B., Brandstetter, J., Obaid, M., et al. (2015). Reciprocity in human-robot interaction: a quantitative approach through the prisoner\u2019s dilemma and the ultimatum game. International Journal of Social Robotics, 8, 303\u2013317. https:\/\/api.semanticscholar.org\/CorpusID:11008719","DOI":"10.1007\/s12369-015-0323-x"},{"issue":"5626","key":"6893_CR33","doi-asserted-by":"publisher","first-page":"1755","DOI":"10.1126\/science.1082976","volume":"300","author":"AG Sanfey","year":"2003","unstructured":"Sanfey, A. G., Rilling, J. K., Aronson, J. A., et al. (2003). The neural basis of economic decision-making in the ultimatum game. Science, 300(5626), 1755\u20131758.","journal-title":"Science"},{"key":"6893_CR34","unstructured":"Sch\u00e4fer, F., & Anandkumar, A. (2019). Competitive gradient descent. Advances in Neural Information Processing Systems, 32."},{"key":"6893_CR35","doi-asserted-by":"crossref","unstructured":"Smith, E. A. (2006). Foundations of human sociality: Economic experiments and ethnographic evidence from fifteen small-scale societies.","DOI":"10.1525\/aa.2006.108.2.420"},{"key":"6893_CR36","doi-asserted-by":"crossref","unstructured":"Synnaeve, G., & Bessiere, P. (2011). A bayesian model for opening prediction in rts games with application to starcraft. In 2011 IEEE conference on computational intelligence and games (CIG\u201911), IEEE, pp. 281\u2013288.","DOI":"10.1109\/CIG.2011.6032018"},{"key":"6893_CR37","unstructured":"Tang, Z., Yu, C., Chen, B., et\u00a0al. (2021). Discovering diverse multi-agent strategic behavior via reward randomization. arXiv preprint arXiv:2103.04564"},{"issue":"1","key":"6893_CR38","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1006\/game.1998.0687","volume":"28","author":"E Van Damme","year":"1999","unstructured":"Van Damme, E., & Hurkens, S. (1999). Endogenous stackelberg leadership. Games and Economic Behavior, 28(1), 105\u2013129. https:\/\/doi.org\/10.1006\/game.1998.0687","journal-title":"Games and Economic Behavior"},{"key":"6893_CR39","doi-asserted-by":"crossref","unstructured":"Weber, B. G., & Mateas, M. (2009). A data mining approach to strategy prediction. In 2009 IEEE symposium on computational intelligence and games, IEEE, pp. 140\u2013147.","DOI":"10.1109\/CIG.2009.5286483"},{"key":"6893_CR40","first-page":"16509","volume":"35","author":"M Wen","year":"2022","unstructured":"Wen, M., Kuba, J., Lin, R., et al. (2022). Multi-agent reinforcement learning is a sequence modeling problem. Advances in Neural Information Processing Systems, 35, 16509\u201316521.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6893_CR41","unstructured":"Wen, Y., Yang, Y., Luo, R., et\u00a0al. (2019). Probabilistic recursive reasoning for multi-agent reinforcement learning. arXiv preprint arXiv:1901.09207"},{"key":"6893_CR42","unstructured":"Willi, T., Letcher, A. H., Treutlein, J., et\u00a0al. (2022). Cola: consistent learning with opponent-learning awareness. International Conference on Machine Learning, PMLR, pp. 23804\u201323831."},{"key":"6893_CR43","doi-asserted-by":"crossref","unstructured":"Willis, R., Du, Y., Leibo, J. Z., et\u00a0al. (2024). Resolving social dilemmas with minimal reward transfer. arxiv:2310.12928","DOI":"10.1007\/s10458-024-09675-4"},{"key":"6893_CR44","unstructured":"Yang, J., Li, A., Farajtabar, M., et\u00a0al. (2020). Learning to incentivize other learning agents. arxiv:2006.06051"},{"key":"6893_CR45","unstructured":"Yang, Y., & Wang, J. (2020). An overview of multi-agent reinforcement learning from game theoretical perspective. arXiv preprint arXiv:2011.00583"},{"key":"6893_CR46","first-page":"24611","volume":"35","author":"C Yu","year":"2022","unstructured":"Yu, C., Velu, A., Vinitsky, E., et al. (2022). The surprising effectiveness of ppo in cooperative multi-agent games. Advances in Neural Information Processing Systems, 35, 24611\u201324624.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6893_CR47","doi-asserted-by":"crossref","unstructured":"Zhang, C., & Lesser, V. (2010). Multi-agent learning with policy prediction. In Proceedings of the AAAI conference on artificial intelligence, pp. 927\u2013934.","DOI":"10.1609\/aaai.v24i1.7639"},{"key":"6893_CR48","doi-asserted-by":"crossref","unstructured":"Zhang, H., Chen, W., Huang, Z., et\u00a0al. (2020) Bi-level actor-critic for multi-agent coordination. In Proceedings of the AAAI conference on artificial intelligence, pp. 7325\u20137332.","DOI":"10.1609\/aaai.v34i05.6226"},{"key":"6893_CR49","first-page":"1166","volume":"33","author":"K Zhang","year":"2020","unstructured":"Zhang, K., Kakade, S., Basar, T., et al. (2020). Model-based multi-agent rl in zero-sum markov games with near-optimal sample complexity. Advances in Neural Information Processing Systems, 33, 1166\u20131178.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6893_CR50","doi-asserted-by":"crossref","unstructured":"Zhang, K., Yang, Z., & Ba\u015far, T. (2021). Multi-agent reinforcement learning: A selective overview of theories and algorithms. Handbook of reinforcement learning and control pp. 321\u2013384.","DOI":"10.1007\/978-3-030-60990-0_12"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-025-06893-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-025-06893-z","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-025-06893-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,22]],"date-time":"2025-12-22T21:30:02Z","timestamp":1766439002000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-025-06893-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,26]]},"references-count":50,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["6893"],"URL":"https:\/\/doi.org\/10.1007\/s10994-025-06893-z","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"type":"print","value":"0885-6125"},{"type":"electronic","value":"1573-0565"}],"subject":[],"published":{"date-parts":[[2025,11,26]]},"assertion":[{"value":"13 August 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 July 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 September 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 November 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"292"}}