{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,26]],"date-time":"2025-12-26T07:06:58Z","timestamp":1766732818507,"version":"3.40.3"},"publisher-location":"Cham","reference-count":50,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030647926"},{"type":"electronic","value":"9783030647933"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-64793-3_15","type":"book-chapter","created":{"date-parts":[[2020,12,21]],"date-time":"2020-12-21T20:09:37Z","timestamp":1608581377000},"page":"277-296","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Normalizing Flow Policies for Multi-agent Systems"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7491-3935","authenticated-orcid":false,"given":"Xiaobai","family":"Ma","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4742-9942","authenticated-orcid":false,"given":"Jayesh K.","family":"Gupta","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7238-9663","authenticated-orcid":false,"given":"Mykel J.","family":"Kochenderfer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,12,22]]},"reference":[{"key":"15_CR1","unstructured":"Balduzzi, D., Tuyls, K., Perolat, J., Graepel, T.: Re-evaluating evaluation. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 3268\u20133279 (2018)"},{"key":"15_CR2","unstructured":"Bansal, T., Pachocki, J., Sidor, S., Sutskever, I., Mordatch, I.: Emergent complexity via multi-agent competition. In: International Conference on Learning Representations (ICLR) (2018)"},{"key":"15_CR3","doi-asserted-by":"crossref","unstructured":"Bhattacharyya, R.P., Phillips, D.J., Liu, C., Gupta, J.K., Driggs-Campbell, K., Kochenderfer, M.J.: Simulating emergent properties of human driving behavior using multi-agent reward augmented imitation learning. In: 2019 International Conference on Robotics and Automation (ICRA), pp. 789\u2013795. IEEE (2019)","DOI":"10.1109\/ICRA.2019.8793750"},{"key":"15_CR4","doi-asserted-by":"crossref","unstructured":"Blum, A., Mansour, Y.: Learning, regret minimization, and equilibria. In: Nisan, N., Roughgarden, T., Tardos, E., Vazirani, V.V. (eds.) Algorithmic Game Theory, chap. 4, pp. 79\u2013102. Cambridge University Press (2007)","DOI":"10.1017\/CBO9780511800481.006"},{"key":"15_CR5","unstructured":"Brown, G.W.: Iterative solution of games by fictitious play. In: Activity Analysis of Production and Allocation, vol. 13, no. 1, pp. 374\u2013376 (1951)"},{"issue":"2","key":"15_CR6","doi-asserted-by":"publisher","first-page":"156","DOI":"10.1109\/TSMCC.2007.913919","volume":"38","author":"L Busoniu","year":"2008","unstructured":"Busoniu, L., Babuska, R., De Schutter, B.: A comprehensive survey of multiagent reinforcement learning. IEEE Trans. Syst. Man Cybern. Part C (Appl. Rev.) 38(2), 156\u2013172 (2008)","journal-title":"IEEE Trans. Syst. Man Cybern. Part C (Appl. Rev.)"},{"key":"15_CR7","doi-asserted-by":"crossref","unstructured":"Cerm\u00e1k, J., Bo\u0161ansk\u00fd, B., Durkota, K., Lis\u00fd, V., Kiekintveld, C.: Using correlated strategies for computing Stackelberg equilibria in extensive-form games. In: AAAI Conference on Artificial Intelligence (2016)","DOI":"10.1609\/aaai.v30i1.10045"},{"key":"15_CR8","unstructured":"Dinh, L., Krueger, D., Bengio, Y.: NICE: non-linear independent components estimation. arXiv preprint arXiv:1410.8516 (2014)"},{"key":"15_CR9","unstructured":"Dinh, L., Sohl-Dickstein, J., Bengio, S.: Density estimation using Real NVP. arXiv preprint arXiv:1605.08803 (2016)"},{"key":"15_CR10","unstructured":"Duan, Y., Chen, X., Houthooft, R., Schulman, J., Abbeel, P.: Benchmarking deep reinforcement learning for continuous control. In: International Conference on Machine Learning (ICML), pp. 1329\u20131338 (2016)"},{"key":"15_CR11","unstructured":"Germain, M., Gregor, K., Murray, I., Larochelle, H.: MADE: masked autoencoder for distribution estimation. In: International Conference on Machine Learning (ICML), pp. 881\u2013889 (2015)"},{"key":"15_CR12","unstructured":"Goodfellow, I., et al.: Generative adversarial nets. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 2672\u20132680 (2014)"},{"key":"15_CR13","unstructured":"Haarnoja, T., Tang, H., Abbeel, P., Levine, S.: Reinforcement learning with deep energy-based policies. In: International Conference on Machine Learning (ICML), pp. 1352\u20131361 (2017)"},{"key":"15_CR14","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. arXiv preprint arXiv:1801.01290 (2018)"},{"key":"15_CR15","doi-asserted-by":"crossref","unstructured":"Haskell, W.B., Kar, D., Fang, F., Tambe, M., Cheung, S., Denicola, E.: Robust protection of fisheries with COmPASS. In: AAAI Conference on Artificial Intelligence (2014)","DOI":"10.1609\/aaai.v28i2.19018"},{"key":"15_CR16","unstructured":"Heinrich, J., Silver, D.: Deep reinforcement learning from self-play in imperfect-information games. arXiv preprint arXiv:1603.01121 (2016)"},{"key":"15_CR17","unstructured":"Ho, J., Ermon, S.: Generative adversarial imitation learning. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 4565\u20134573 (2016)"},{"key":"15_CR18","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/11691839_1","volume-title":"Learning and Adaption in Multi-Agent Systems","author":"PJ Hoen","year":"2006","unstructured":"Hoen, P.J., Tuyls, K., Panait, L., Luke, S., La Poutr\u00e9, J.A.: An overview of cooperative and competitive multiagent learning. In: Tuyls, K., Hoen, P.J., Verbeeck, K., Sen, S. (eds.) LAMAS 2005. LNCS (LNAI), vol. 3898, pp. 1\u201346. Springer, Heidelberg (2006). https:\/\/doi.org\/10.1007\/11691839_1"},{"key":"15_CR19","unstructured":"Johnson, M.P., Fang, F., Tambe, M.: Designing patrol strategies to maximize pristine forest area. In: AAAI Conference on Artificial Intelligence (2012)"},{"key":"15_CR20","doi-asserted-by":"crossref","unstructured":"Kamra, N., Fang, F., Kar, D., Liu, Y., Tambe, M.: Handling continuous space security games with neural networks. In: IWAISe: International Workshop on Artificial Intelligence in Security, p. 17 (2017)","DOI":"10.1609\/aaai.v32i1.11446"},{"key":"15_CR21","doi-asserted-by":"crossref","unstructured":"Kamra, N., Gupta, U., Fang, F., Liu, Y., Tambe, M.: Policy learning for continuous space security games using neural networks. In: AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.11446"},{"key":"15_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"238","DOI":"10.1007\/978-3-030-32430-8_15","volume-title":"Decision and Game Theory for Security","author":"N Kamra","year":"2019","unstructured":"Kamra, N., Gupta, U., Wang, K., Fang, F., Liu, Y., Tambe, M.: DeepFP for finding nash equilibrium in continuous action spaces. In: Alpcan, T., Vorobeychik, Y., Baras, J.S., D\u00e1n, G. (eds.) GameSec 2019. LNCS, vol. 11836, pp. 238\u2013258. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-32430-8_15"},{"key":"15_CR23","unstructured":"Kiekintveld, C., Jain, M., Tsai, J., et al.: Computing optimal randomized resource allocations for massive security games. In: International Conference on Autonomous Agents and Multi-agent Systems, pp. 689\u2013696 (2009)"},{"key":"15_CR24","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: International Conference on Learning Representations (2015)"},{"key":"15_CR25","unstructured":"Kingma, D.P., Dhariwal, P.: Glow: generative flow with invertible 1$$\\times $$1 convolutions. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 10215\u201310224 (2018)"},{"key":"15_CR26","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational Bayes. In: International Conference on Learning Representations (ICLR) (2013)"},{"key":"15_CR27","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/10187.001.0001","volume-title":"Decision Making Under Uncertainty: Theory and Application","author":"MJ Kochenderfer","year":"2015","unstructured":"Kochenderfer, M.J.: Decision Making Under Uncertainty: Theory and Application. MIT Press, Cambridge (2015)"},{"key":"15_CR28","unstructured":"Lanctot, M., Zambaldi, V., Gruslys, A., et al.: A unified game-theoretic approach to multiagent reinforcement learning. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 4190\u20134203 (2017)"},{"key":"15_CR29","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)"},{"key":"15_CR30","doi-asserted-by":"crossref","unstructured":"Littman, M.L.: Markov games as a framework for multi-agent reinforcement learning. In: Machine Learning, pp. 157\u2013163. Elsevier (1994)","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"key":"15_CR31","unstructured":"Liu, S., Lever, G., Merel, J., Tunyasuvunakool, S., Heess, N., Graepel, T.: Emergent coordination through competition. In: International Conference on Learning Representations (ICLR) (2018)"},{"key":"15_CR32","volume-title":"Advanced Microeconomic Theory: An Intuitive Approach with Examples","author":"F Mu\u00f1oz-Garcia","year":"2017","unstructured":"Mu\u00f1oz-Garcia, F.: Advanced Microeconomic Theory: An Intuitive Approach with Examples. MIT Press, Cambridge (2017)"},{"issue":"1","key":"15_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41598-019-45619-9","volume":"9","author":"S Omidshafiei","year":"2019","unstructured":"Omidshafiei, S., et al.: $$\\alpha $$-rank: multi-agent evaluation by evolution. Sci. Rep. 9(1), 1\u201329 (2019)","journal-title":"Sci. Rep."},{"key":"15_CR34","unstructured":"Papamakarios, G., Nalisnick, E., Rezende, D.J., Mohamed, S., Lakshminarayanan, B.: Normalizing flows for probabilistic modeling and inference. arXiv preprint arXiv:1912.02762 (2019)"},{"issue":"5\u20136","key":"15_CR35","doi-asserted-by":"publisher","first-page":"355","DOI":"10.1561\/2200000073","volume":"11","author":"G Peyr\u00e9","year":"2019","unstructured":"Peyr\u00e9, G., Cuturi, M., et al.: Computational optimal transport. Found. Trends\u00ae Mach. Learn. 11(5\u20136), 355\u2013607 (2019)","journal-title":"Found. Trends\u00ae Mach. Learn."},{"key":"15_CR36","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1162\/neco.1991.3.1.88","volume":"3","author":"D Pomerleau","year":"1991","unstructured":"Pomerleau, D.: Efficient training of artificial neural networks for autonomous navigation. Neural Comput. 3, 88\u201397 (1991)","journal-title":"Neural Comput."},{"key":"15_CR37","unstructured":"Rezende, D., Mohamed, S.: Variational inference with normalizing flows. In: International Conference on Machine Learning (ICML), pp. 1530\u20131538 (2015)"},{"key":"15_CR38","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"794","DOI":"10.1007\/978-3-030-01261-8_47","volume-title":"Computer Vision \u2013 ECCV 2018","author":"N Rhinehart","year":"2018","unstructured":"Rhinehart, N., Kitani, K.M., Vernaza, P.: r2p2: a ReparameteRized pushforward policy for diverse, precise generative path forecasting. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11217, pp. 794\u2013811. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01261-8_47"},{"key":"15_CR39","doi-asserted-by":"crossref","unstructured":"Rosenfeld, A., Kraus, S.: When security games hit traffic: optimal traffic enforcement under one sided uncertainty. In: International Joint Conferences on Artificial Intelligence (IJCAI), pp. 3814\u20133822 (2017)","DOI":"10.24963\/ijcai.2017\/533"},{"key":"15_CR40","doi-asserted-by":"crossref","unstructured":"Schmerling, E., Leung, K., Vollprecht, W., Pavone, M.: Multimodal probabilistic model-based planning for human-robot interaction. In: IEEE International Conference on Robotics and Automation (ICRA), pp. 1\u20139 (2017)","DOI":"10.1109\/ICRA.2018.8460766"},{"key":"15_CR41","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"issue":"10","key":"15_CR42","doi-asserted-by":"publisher","first-page":"1095","DOI":"10.1073\/pnas.39.10.1953","volume":"39","author":"LS Shapley","year":"1953","unstructured":"Shapley, L.S.: Stochastic games. Proc. Natl. Acad. Sci. 39(10), 1095\u20131100 (1953). ISSN 0027\u20138424","journal-title":"Proc. Natl. Acad. Sci."},{"issue":"7587","key":"15_CR43","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484 (2016)","journal-title":"Nature"},{"key":"15_CR44","unstructured":"Silver, D., Lever, G., Heess, N., Degris, T., Wierstra, D., Riedmiller, M.: Deterministic policy gradient algorithms. In: International Conference on Machine Learning (ICML), pp. 387\u2013395 (2014)"},{"issue":"7676","key":"15_CR45","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., Schrittwieser, J., Simonyan, K., et al.: Mastering the game of go without human knowledge. Nature 550(7676), 354 (2017)","journal-title":"Nature"},{"key":"15_CR46","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511973031","volume-title":"Security and Game Theory: Algorithms, Deployed Systems, Lessons Learned","author":"M Tambe","year":"2011","unstructured":"Tambe, M.: Security and Game Theory: Algorithms, Deployed Systems, Lessons Learned. Cambridge University Press, Cambridge (2011)"},{"key":"15_CR47","unstructured":"Tang, Y., Agrawal, S.: Implicit policy for reinforcement learning. arXiv preprint arXiv:1806.06798 (2018)"},{"issue":"1\u20132","key":"15_CR48","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1016\/0025-5564(78)90077-9","volume":"40","author":"PD Taylor","year":"1978","unstructured":"Taylor, P.D., Jonker, L.B.: Evolutionary stable strategies and game dynamics. Math. Biosci. 40(1\u20132), 145\u2013156 (1978)","journal-title":"Math. Biosci."},{"key":"15_CR49","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","volume":"575","author":"O Vinyals","year":"2019","unstructured":"Vinyals, O., et al.: Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nature 575, 350\u2013354 (2019). ISSN 0028\u20130836","journal-title":"Nature"},{"key":"15_CR50","doi-asserted-by":"publisher","first-page":"1002","DOI":"10.1007\/s10489-018-1307-y","volume":"49","author":"B Wang","year":"2017","unstructured":"Wang, B., Zhang, Y., Zhou, Z.H., Zhong, S.: On repeated Stackelberg security game with the cooperative human behavior model for wildlife protection. Appl. Intell. 49, 1002\u20131015 (2017)","journal-title":"Appl. Intell."}],"container-title":["Lecture Notes in Computer Science","Decision and Game Theory for Security"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-64793-3_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,8]],"date-time":"2022-12-08T06:35:20Z","timestamp":1670481320000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-64793-3_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030647926","9783030647933"],"references-count":50,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-64793-3_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"22 December 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"GameSec","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Decision and Game Theory for Security","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"College Park, MD","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"gamesec2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.gamesec-conf.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"29","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"21","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"72% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"He conference was held virtueally due to COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}