{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T21:50:10Z","timestamp":1743112210451,"version":"3.40.3"},"publisher-location":"Cham","reference-count":52,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031727603"},{"type":"electronic","value":"9783031727610"}],"license":[{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72761-0_2","type":"book-chapter","created":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T07:01:50Z","timestamp":1727593310000},"page":"19-36","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Modelling Competitive Behaviors in\u00a0Autonomous Driving Under Generative World Model"],"prefix":"10.1007","author":[{"given":"Guanren","family":"Qiao","sequence":"first","affiliation":[]},{"given":"Guorui","family":"Quan","sequence":"additional","affiliation":[]},{"given":"Rongxiao","family":"Qu","sequence":"additional","affiliation":[]},{"given":"Guiliang","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,30]]},"reference":[{"key":"2_CR1","unstructured":"Bai, Y., Jin, C., Wang, H., Xiong, C.: Sample-efficient learning of stackelberg equilibria in general-sum games. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 25799\u201325811 (2021)"},{"key":"2_CR2","unstructured":"Bai, Y., Jin, C., Yu, T.: Near-optimal reinforcement learning with self-play. In: Advances in Neural Information Processing Systems (NeurIPS) (2020)"},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Bergamini, L., et al.: Simnet: learning reactive self-driving simulations from real-world observations. In: IEEE International Conference on Robotics and Automation (ICRA), pp. 5119\u20135125 (2021)","DOI":"10.1109\/ICRA48506.2021.9561666"},{"key":"2_CR4","doi-asserted-by":"publisher","first-page":"124","DOI":"10.3141\/1852-16","volume":"1852","author":"E Brockfeld","year":"2003","unstructured":"Brockfeld, E., K\u00fchne, R.D., Skabardonis, A., Wagner, P.: Toward benchmarking of microscopic traffic flow models. Transp. Res. Rec. 1852, 124\u2013129 (2003)","journal-title":"Transp. Res. Rec."},{"key":"2_CR5","unstructured":"Cen, S., Chi, Y., Du, S.S., Xiao, L.: Faster last-iterate convergence of policy optimization in zero-sum Markov games. In: International Conference on Learning Representations (ICLR) (2023)"},{"key":"2_CR6","unstructured":"Chai, Y., Sapp, B., Bansal, M., Anguelov, D.: Multipath: multiple probabilistic anchor trajectory hypotheses for behavior prediction. In: Annual Conference on Robot Learning (CoRL), vol.\u00a0100, pp. 86\u201399 (2019)"},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Chen, Y., Ivanovic, B., Pavone, M.: Scept: scene-consistent, policy-based trajectory predictions for planning. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 17082\u201317091 (2022)","DOI":"10.1109\/CVPR52688.2022.01659"},{"key":"2_CR8","unstructured":"Dosovitskiy, A., Ros, G., Codevilla, F., L\u00f3pez, A.M., Koltun, V.: Carla: an open urban driving simulator. In: Annual Conference on Robot Learning (CoRL) (2017)"},{"key":"2_CR9","doi-asserted-by":"crossref","unstructured":"Feng, L., Li, Q., Peng, Z., Tan, S., Zhou, B.: Trafficgen: learning to generate diverse and realistic traffic scenarios. In: IEEE International Conference on Robotics and Automation (ICRA), pp. 3567\u20133575 (2023)","DOI":"10.1109\/ICRA48891.2023.10160296"},{"key":"2_CR10","unstructured":"Gulino, C., et al.: Waymax: an accelerated, data-driven simulator for large-scale autonomous driving research. CoRR abs\/2310.08710 (2023)"},{"key":"2_CR11","unstructured":"Haarnoja, T., Tang, H., Abbeel, P., Levine, S.: Reinforcement learning with deep energy-based policies. In: International Conference on Machine Learning (ICML), pp. 1352\u20131361 (2017)"},{"key":"2_CR12","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning (ICML), vol. 80, pp. 1856\u20131865 (2018)"},{"key":"2_CR13","unstructured":"Hambly, B.M., Xu, R., Yang, H.: Policy gradient methods find the nash equilibrium in n-player general-sum linear-quadratic games. J. Mach. Learn. Res. (JMLR) 24, 139:1\u2013139:56 (2023)"},{"key":"2_CR14","unstructured":"Hu, J., Wellman, M.P.: Nash Q-learning for general-sum stochastic games. J. Mach. Learn. Res. (JMLR) 4, 1039\u20131069 (2003)"},{"key":"2_CR15","doi-asserted-by":"crossref","unstructured":"Huang, Z., Liu, H., Lv, C.: Gameformer: game-theoretic modeling and learning of transformer-based interactive prediction and planning for autonomous driving. In: International Conference on Computer Vision (ICCV), pp. 3880\u20133890 (2023)","DOI":"10.1109\/ICCV51070.2023.00361"},{"key":"2_CR16","doi-asserted-by":"crossref","unstructured":"Hwang, K.S., Chiou, J.Y., Chen, T.Y.: Cooperative reinforcement learning based on zero-sum games. In: SICE Annual Conference, pp. 2973\u20132976 (2008)","DOI":"10.1109\/SICE.2008.4655172"},{"key":"2_CR17","doi-asserted-by":"crossref","unstructured":"Jin, C., Liu, Q., Wang, Y., Yu, T.: V-learning-a simple, efficient, decentralized algorithm for multiagent RL. In: International Conference on Learning Representations (ICLR Workshop) (2022)","DOI":"10.1287\/moor.2021.0317"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Kar, A., et al.: Meta-sim: learning to generate synthetic datasets. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 4550\u20134559 (2019)","DOI":"10.1109\/ICCV.2019.00465"},{"key":"2_CR19","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. In: International Conference on Learning Representations (ICLR) (2014)"},{"key":"2_CR20","unstructured":"Leonardos, S., Overman, W., Panageas, I., Piliouras, G.: Global convergence of multi-agent policy gradient in Markov potential games. In: International Conference on Learning Representations (ICLR) (2022)"},{"key":"2_CR21","doi-asserted-by":"crossref","unstructured":"Liang, M., et al.: Learning lane graph representations for motion forecasting. In: European Conference on Computer Vision (ECCV), vol. 12347, pp. 541\u2013556 (2020)","DOI":"10.1007\/978-3-030-58536-5_32"},{"key":"2_CR22","unstructured":"Lioutas, V., Scibior, A., Wood, F.: Titrated: learned human driving behavior without infractions via amortized inference. Trans. Mach. Learn. Res. (TMLR) (2022)"},{"key":"2_CR23","unstructured":"Liu, M., Ozdaglar, A.E., Yu, T., Zhang, K.: The power of regularization in solving extensive-form games. In: International Conference on Learning Representations (ICLR) (2023)"},{"key":"2_CR24","unstructured":"Liu, S., Zhu, M.: Distributed inverse constrained reinforcement learning for multi-agent systems. In: Neural Information Processing Systems (NeurIPS) (2022)"},{"key":"2_CR25","unstructured":"Liu, S., Zhu, M.: Learning multi-agent behaviors from distributed and streaming demonstrations. In: Neural Information Processing Systems (NeurIPS) (2023)"},{"key":"2_CR26","doi-asserted-by":"crossref","unstructured":"Lopez, P.A., et al.: Microscopic traffic simulation using sumo. In: IEEE International Conference on Intelligent Transportation Systems (ITSC), pp. 2575\u20132582 (2018)","DOI":"10.1109\/ITSC.2018.8569938"},{"key":"2_CR27","unstructured":"Lowe, R., Wu, Y., Tamar, A., Harb, J., Abbeel, P., Mordatch, I.: Multi-agent actor-critic for mixed cooperative-competitive environments. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 6379\u20136390 (2017)"},{"issue":"1","key":"2_CR28","first-page":"165","volume":"13","author":"W Mao","year":"2023","unstructured":"Mao, W., Basar, T.: Provably efficient reinforcement learning in decentralized general-sum Markov games. Dyn. Games Appl. 13(1), 165\u2013186 (2023)","journal-title":"Dyn. Games Appl."},{"issue":"7540","key":"2_CR29","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"2_CR30","doi-asserted-by":"crossref","unstructured":"Nash, J.: Non-cooperative games. Ann. Math. 286\u2013295 (1951)","DOI":"10.2307\/1969529"},{"key":"2_CR31","doi-asserted-by":"crossref","unstructured":"Salzmann, T., Ivanovic, B., Chakravarty, P., Pavone, M.: Trajectron++: dynamically-feasible trajectory forecasting with heterogeneous data. In: European Conference on Computer Vision (ECCV), vol. 12363, pp. 683\u2013700 (2020)","DOI":"10.1007\/978-3-030-58523-5_40"},{"key":"2_CR32","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M.I., Moritz, P.: Trust region policy optimization. In: International Conference on Machine Learning (ICML), vol.\u00a037, pp. 1889\u20131897 (2015)"},{"key":"2_CR33","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. CoRR abs\/1707.06347 (2017)"},{"key":"2_CR34","doi-asserted-by":"crossref","unstructured":"\u015acibior, A., Lioutas, V., Reda, D., Bateni, P., Wood, F.: Imagining the road ahead: multi-agent trajectory prediction via differentiable simulation. In: IEEE International Intelligent Transportation Systems Conference (ITSC), pp. 720\u2013725 (2021)","DOI":"10.1109\/ITSC48978.2021.9565113"},{"key":"2_CR35","unstructured":"Sokota, S., et al.: A unified approach to reinforcement learning, quantal response equilibria, and two-player zero-sum games. In: International Conference on Learning Representations (ICLR) (2023)"},{"key":"2_CR36","unstructured":"Song, Z., Mei, S., Bai, Y.: When can we learn general-sum Markov games with a large number of players sample-efficiently? In: International Conference on Learning Representations (ICLR) (2022)"},{"key":"2_CR37","doi-asserted-by":"crossref","unstructured":"Suo, S., Regalado, S., Casas, S., Urtasun, R.: Trafficsim: learning to simulate realistic multi-agent behaviors. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10400\u201310409 (2021)","DOI":"10.1109\/CVPR46437.2021.01026"},{"key":"2_CR38","doi-asserted-by":"crossref","unstructured":"Suo, S., et al.: Mixsim: a hierarchical framework for mixed reality traffic simulation. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9622\u20139631 (2023)","DOI":"10.1109\/CVPR52729.2023.00928"},{"key":"2_CR39","doi-asserted-by":"crossref","unstructured":"Tan, S., Wong, K., Wang, S., Manivasagam, S., Ren, M., Urtasun, R.: Scenegen: learning to generate realistic traffic scenes. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 892\u2013901 (2021)","DOI":"10.1109\/CVPR46437.2021.00095"},{"key":"2_CR40","unstructured":"Wilson, B., et al.: Argoverse 2: next generation datasets for self-driving perception and forecasting. In: Advances in Neural Information Processing Systems Track on Datasets and Benchmarks 1 (NeurIPS) (2021)"},{"key":"2_CR41","doi-asserted-by":"crossref","unstructured":"Xu, D., Chen, Y., Ivanovic, B., Pavone, M.: Bits: bi-level imitation for traffic simulation. In: IEEE International Conference on Robotics and Automation (ICRA), pp. 2929\u20132936. IEEE (2023)","DOI":"10.1109\/ICRA48891.2023.10161167"},{"issue":"1","key":"2_CR42","doi-asserted-by":"publisher","first-page":"2037","DOI":"10.1038\/s41467-023-37677-5","volume":"14","author":"X Yan","year":"2023","unstructured":"Yan, X., Zou, Z., Feng, S., Zhu, H., Sun, H., Liu, H.X.: Learning naturalistic driving environment with statistical realism. Nat. Commun. 14(1), 2037 (2023)","journal-title":"Nat. Commun."},{"key":"2_CR43","unstructured":"Yang, Y., Wang, J.: An overview of multi-agent reinforcement learning from game theoretical perspective. CoRR abs\/2011.00583 (2020)"},{"key":"2_CR44","unstructured":"Yu, C., et al.: The surprising effectiveness of PPO in cooperative multi-agent games. In: Advances in Neural Information Processing Systems (NeurIPS) (2022)"},{"key":"2_CR45","doi-asserted-by":"publisher","first-page":"1041","DOI":"10.1080\/02664763.2023.2175799","volume":"51","author":"Z Yu","year":"2023","unstructured":"Yu, Z., Yang, J., Huang, H.H.: Smoothing regression and impact measures for accidents of traffic flows. J. Appl. Stat. 51, 1041\u20131056 (2023)","journal-title":"J. Appl. Stat."},{"key":"2_CR46","unstructured":"Zhang, C., Tu, J., Zhang, L., Wong, K., Suo, S., Urtasun, R.: Learning realistic traffic agents in closed-loop. In: Annual Conference on Robot Learning (CoRL) (2023)"},{"issue":"6","key":"2_CR47","doi-asserted-by":"publisher","first-page":"3586","DOI":"10.1137\/19M1288012","volume":"58","author":"K Zhang","year":"2020","unstructured":"Zhang, K., Koppel, A., Zhu, H., Basar, T.: Global convergence of policy gradient methods to (almost) locally optimal policies. SIAM J. Control. Optim. 58(6), 3586\u20133612 (2020)","journal-title":"SIAM J. Control. Optim."},{"key":"2_CR48","doi-asserted-by":"crossref","unstructured":"Zhang, K., Yang, Z., Ba\u015far, T.: Multi-agent reinforcement learning: a selective overview of theories and algorithms. In: Handbook of Reinforcement Learning and Control, pp. 321\u2013384 (2021)","DOI":"10.1007\/978-3-030-60990-0_12"},{"key":"2_CR49","unstructured":"Zhang, Y., Zhang, R., Gu, Y., Li, N.: Multi-agent reinforcement learning with reward delays. In: Learning for Dynamics and Control Conference (L4DC), vol.\u00a0211, pp. 692\u2013704 (2023)"},{"key":"2_CR50","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Wang, J., Li, Y., Huang, Y.: Query-centric trajectory prediction. In: International Conference on Computer Vision and Pattern Recognition (CVPR), pp. 17863\u201317873 (2023)","DOI":"10.1109\/CVPR52729.2023.01713"},{"key":"2_CR51","unstructured":"Zhou, Z., Wen, Z., Wang, J., Li, Y., Huang, Y.: Qcnext: a next-generation framework for joint multi-agent trajectory prediction. CoRR abs\/2306.10508 (2023)"},{"key":"2_CR52","unstructured":"Ziebart, B.D., Bagnell, J.A., Dey, A.K.: Modeling interaction via the principle of maximum causal entropy. In: International Conference on Machine Learning (ICML), pp. 1255\u20131262 (2010)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72761-0_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T07:26:12Z","timestamp":1727594772000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72761-0_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,30]]},"ISBN":["9783031727603","9783031727610"],"references-count":52,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72761-0_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,9,30]]},"assertion":[{"value":"30 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}