{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T12:19:43Z","timestamp":1766578783852,"version":"3.40.3"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031200953"},{"type":"electronic","value":"9783031200960"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-20096-0_48","type":"book-chapter","created":{"date-parts":[[2023,1,12]],"date-time":"2023-01-12T15:04:11Z","timestamp":1673535851000},"page":"634-648","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["An Overview of Opponent Modeling for Multi-agent Competition"],"prefix":"10.1007","author":[{"given":"Lu","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jie","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yaoyuan","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingci","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuxi","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,1,13]]},"reference":[{"key":"48_CR1","doi-asserted-by":"crossref","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature\u00a0\u00a0521(7553), 436\u2013444 (2015)","DOI":"10.1038\/nature14539"},{"key":"48_CR2","unstructured":"Li, Y.: Deep reinforcement learning: An overview. arXiv preprint arXiv:1701.07274(2017)"},{"issue":"2","key":"48_CR3","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1109\/JAS.2016.7471613","volume":"3","author":"FY Wang","year":"2016","unstructured":"Wang, F.Y., Zhang, J.J., Zheng, X., et al.: Where does alphago go: From church-turing thesis to alphago thesis and beyond. IEEE\/CAA J. Autom. Sinica 3(2), 113\u2013120 (2016)","journal-title":"IEEE\/CAA J. Autom. Sinica"},{"key":"48_CR4","doi-asserted-by":"crossref","unstructured":"Holcomb, S.D., Porter, W.K., Ault, S.V., et al.: Overview on deepmind and its alphago zero ai. In: Proceedings of the 2018\u00a0International Conference on Big Data and Education, pp. 67\u201371\u00a0 (2018)","DOI":"10.1145\/3206157.3206174"},{"issue":"3","key":"48_CR5","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1109\/JAS.2016.7508798","volume":"3","author":"L Li","year":"2016","unstructured":"Li, L., Lv, Y., Wang, F.Y.: Traffic signal timing via deep reinforcement learning[J]. IEEE\/CAA J. Autom. Sinica 3(3), 247\u2013254 (2016)","journal-title":"IEEE\/CAA J. Autom. Sinica"},{"key":"48_CR6","doi-asserted-by":"crossref","unstructured":"Li, S., Chi, H., Xie, T.: Multi-agent combation-stationary environments. In:\u00a02021InternationalJoint Conference on Neural Networks (IJCNN). Shenzhen, China: IEEE, pp. 1\u20138 (2021)","DOI":"10.1109\/IJCNN52387.2021.9534036"},{"key":"48_CR7","doi-asserted-by":"crossref","unstructured":"Da Silva B.C., Basso, E.W., Bazzan, A.L., et al.: Dealing with non-stationary environments using context detection. In: Proceedings of the 23rd International Conference on Machine learning, pp. 217\u2013224\u00a0 (2006)","DOI":"10.1145\/1143844.1143872"},{"key":"48_CR8","first-page":"506","volume":"2","author":"M Weinberg","year":"2004","unstructured":"Weinberg, M., Rosenschein, J.S.: Best-response multiagent learning in non-stationary environments\u00a0 Proceedings of the Third International Joint Conference on Autonomous Agents and Multiagent Systems-Volume 2, 506\u2013513 (2004)","journal-title":"Proceedings of the Third International Joint Conference on Autonomous Agents and Multiagent Systems-Volume"},{"key":"48_CR9","unstructured":"Lowe, R., Wu, Y.I., Tamar, A., et al.: Multi-agent actor-critic for mixed cooperative-competitive envi- ronments. In: Advances in Neural Information Processing systems, vol.\u00a030 (2017)"},{"key":"48_CR10","unstructured":"Everett, R., Roberts, S.: Learning against non-stationary agents with opponent modelling and deep reinforcement learning. In:\u00a02018 AAAI spring symposium series, (2018)"},{"issue":"2","key":"48_CR11","doi-asserted-by":"publisher","first-page":"371","DOI":"10.1523\/JNEUROSCI.4421-06.2007","volume":"27","author":"MX Cohen","year":"2007","unstructured":"Cohen, M.X., Ranganath, C.: Reinforcement learning signals predict future decisions. J. Neurosci. 27(2), 371\u2013378 (2007)","journal-title":"J. Neurosci."},{"issue":"3","key":"48_CR12","first-page":"53","volume":"33","author":"W Yeoh","year":"2012","unstructured":"Yeoh, W., Yokoo, M.: Distributed problem solving. AI Mag. 33(3), 53 (2012)","journal-title":"AI Mag."},{"key":"48_CR13","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1016\/S0927-0507(05)80172-0","volume":"2","author":"ML Puterman","year":"1990","unstructured":"Puterman, M.L.: Markov decision processes. Handbooks Oper. Res. Management Sci. 2, 331\u2013434 (1990)","journal-title":"Handbooks Oper. Res. Management Sci."},{"key":"48_CR14","unstructured":"Kumar, A., Zilberstein, S.: Event-detecting multi-agent mdps: Complexity and constant-factor approximation\u00a0(2009)"},{"issue":"11","key":"48_CR15","doi-asserted-by":"publisher","first-page":"1757","DOI":"10.1016\/j.artint.2011.05.001","volume":"175","author":"FS Melo","year":"2011","unstructured":"Melo, F.S., Veloso, M.: Decentralized mdps with sparse interactions. Artif. Intell. 175(11), 1757\u20131789 (2011)","journal-title":"Artif. Intell."},{"key":"48_CR16","unstructured":"Gmytrasiewicz, P.J., Doshi, P.: Interactive pomdps: Properties and preliminary results. In: International Conference on Autonomous Agents: Proceedings of the Third International Joint Conference on Autonomous Agents and Multiagent Systems, vol.\u00a03, pp. 1374\u20131375 (2004)"},{"key":"48_CR17","unstructured":"Wen, Y., Yang, Y., Luo, R., et al.: Probabilistic recursive reasoning for multi-agent reinforcement learning. arXiv preprint arXiv:1901.09207. (2019)"},{"issue":"2","key":"48_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2579821","volume":"15","author":"K Chatterjee","year":"2014","unstructured":"Chatterjee, K., Doyen, L.: Partial-observation stochastic games: How to win when belief fails. ACM Transactions on Computational Logic (TOCL) 15(2), 1\u201344 (2014)","journal-title":"ACM Transactions on Computational Logic (TOCL)"},{"issue":"10","key":"48_CR19","doi-asserted-by":"publisher","first-page":"1095","DOI":"10.1073\/pnas.39.10.1095","volume":"39","author":"LS Shapley","year":"1953","unstructured":"Shapley, L.S.: Stochastic games. Proc. Natl. Acad. Sci. 39(10), 1095\u20131100 (1953)","journal-title":"Proc. Natl. Acad. Sci."},{"key":"48_CR20","doi-asserted-by":"crossref","unstructured":"Littman, M.L.: Markov games as a framework for multi-agent reinforcement learning. Machine learning proceedings,: New Brunswick. NJ, USA: Elsevier pp. 157\u2013163 (1994)","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"issue":"23","key":"48_CR21","doi-asserted-by":"publisher","first-page":"13421","DOI":"10.1073\/pnas.93.23.13421","volume":"93","author":"KA McCabe","year":"1996","unstructured":"McCabe, K.A., Rassenti, S.J., Smith, V.L.: Game theory and reciprocity in some extensive form experimental games. Proc. Natl. Acad. Sci. 93(23), 13421\u201313428 (1996)","journal-title":"Proc. Natl. Acad. Sci."},{"key":"48_CR22","unstructured":"Hinrichs, T.R., Forbus, K.D.: Analogical learning in a turn-based strategy game. IJCAI, pp.\u00a0 853\u2013858\u00a0 2007"},{"key":"48_CR23","doi-asserted-by":"crossref","unstructured":"Kova\u0159\u00edk, V., Schmid, M., Burch, N., et al.: Rethinking formal models of partially observable multiagent decision making. Artif. Intell. 303, 103645 (2022)","DOI":"10.1016\/j.artint.2021.103645"},{"issue":"17","key":"48_CR24","doi-asserted-by":"publisher","first-page":"R644","DOI":"10.1016\/j.cub.2005.08.041","volume":"15","author":"C Frith","year":"2005","unstructured":"Frith, C., Frith, U.: Theory of mind. Curr. Biol. 15(17), R644\u2013R645 (2005)","journal-title":"Curr. Biol."},{"key":"48_CR25","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling, L.P., Littman, M.L., Moore, A.W.: Reinforcement learning: a survey. J.Artif. Intell. Res. 4, 237\u2013285 (1996)","journal-title":"J.Artif. Intell. Res."},{"issue":"3","key":"48_CR26","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins, C.J., Dayan, P.: Q-learning. Mach. Learn. 8(3), 279\u2013292 (1992)","journal-title":"Mach. Learn."},{"key":"48_CR27","doi-asserted-by":"crossref","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., et al.: Human-level control through deep reinforcement learning. Nature\u00a0518(7540), 529\u2013533\u00a02015","DOI":"10.1038\/nature14236"},{"key":"48_CR28","unstructured":"Lillicrap, T.P., Hunt, J.J., Pritzel, A., et al.: Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971. (2015)"},{"key":"48_CR29","unstructured":"Hong, Z.W., Su, S.Y., Shann, T.Y., et al.: A deep policy inference q-network for multi-agent systems. arXiv preprint arXiv:1712.07893. (2017)"},{"key":"48_CR30","unstructured":"Raileanu, R., Denton, E., Szlam, A., et al.: Modeling others using oneself in multi-agent reinforcement learning. In: International Conference on Machine Learning. Stockholm, Sweden: PMLR, pp. 4257\u20134266 (2018)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning for Cyber Security"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-20096-0_48","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,12]],"date-time":"2023-01-12T15:15:21Z","timestamp":1673536521000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-20096-0_48"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031200953","9783031200960"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-20096-0_48","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"13 January 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ML4CS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Machine Learning for Cyber Security","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 December 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ml4cs2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/nsclab.org\/ml4cs2022\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}