{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T12:40:08Z","timestamp":1753274408485,"version":"3.40.3"},"publisher-location":"Cham","reference-count":39,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031147135"},{"type":"electronic","value":"9783031147142"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-14714-2_27","type":"book-chapter","created":{"date-parts":[[2022,8,13]],"date-time":"2022-08-13T21:03:13Z","timestamp":1660424593000},"page":"385-399","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Generalization and\u00a0Computation for\u00a0Policy Classes of\u00a0Generative Adversarial Imitation Learning"],"prefix":"10.1007","author":[{"given":"Yirui","family":"Zhou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yangchun","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaowei","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wanying","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhengping","family":"Che","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiyuan","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jian","family":"Tang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yaxin","family":"Peng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,8,14]]},"reference":[{"key":"27_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Ng, A.Y.: Apprenticeship learning via inverse reinforcement learning. In: International Conference on Machine Learning, pp. 1\u20138 (2004)","DOI":"10.1145\/1015330.1015430"},{"key":"27_CR2","unstructured":"Arora, S., Du, S.S., Hu, W., Li, Z., Salakhutdinov, R.R., Wang, R.: On exact computation with an infinitely wide neural net. In: Advances in Neural Information Processing Systems, vol. 32, pp. 8139\u20138148 (2019)"},{"key":"27_CR3","unstructured":"Arora, S., Ge, R., Liang, Y., Ma, T., Zhang, Y.: Generalization and equilibrium in generative adversarial nets (GANs). In: International Conference on Machine Learning, pp. 224\u2013232 (2017)"},{"issue":"1","key":"27_CR4","first-page":"714","volume":"18","author":"F Bach","year":"2017","unstructured":"Bach, F.: On the equivalence between kernel quadrature rules and random feature expansions. J. Mach. Learn. Res. 18(1), 714\u2013751 (2017)","journal-title":"J. Mach. Learn. Res."},{"key":"27_CR5","first-page":"103","volume":"15","author":"M Bain","year":"1995","unstructured":"Bain, M., Sammut, C.: A framework for behavioural cloning. Mach. Intell. 15, 103\u2013129 (1995)","journal-title":"Mach. Intell."},{"key":"27_CR6","doi-asserted-by":"crossref","unstructured":"Bhattacharyya, R.P., Phillips, D.J., Wulfe, B., Morton, J., Kuefler, A., Kochenderfer, M.J.: Multi-agent imitation learning for driving simulation. In: 2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 1534\u20131539. IEEE (2018)","DOI":"10.1109\/IROS.2018.8593758"},{"key":"27_CR7","unstructured":"Bietti, A., Mairal, J.: On the inductive bias of neural tangent kernels. In: Advances in Neural Information Processing Systems, vol. 32, pp. 12873\u201312884 (2019)"},{"key":"27_CR8","unstructured":"Chen, M., et al.: On computation and generalization of generative adversarial imitation learning. In: International Conference on Learning Representations (2020)"},{"key":"27_CR9","doi-asserted-by":"crossref","unstructured":"Chi, W., et al.: Collaborative robot-assisted endovascular catheterization with generative adversarial imitation learning. In: 2020 IEEE International Conference on Robotics and Automation (ICRA), pp. 2414\u20132420 (2020)","DOI":"10.1109\/ICRA40945.2020.9196912"},{"key":"27_CR10","doi-asserted-by":"crossref","unstructured":"Dally, K., Van Kampen, E.J.: Soft actor-critic deep reinforcement learning for fault tolerant flight control. In: AIAA SCITECH 2022 Forum, pp. 2078\u20132097 (2022)","DOI":"10.2514\/6.2022-2078"},{"key":"27_CR11","unstructured":"Fu, J., Luo, K., Levine, S.: Learning robust rewards with adversarial inverse reinforcement learning. arXiv preprint arXiv:1710.11248 (2017)"},{"key":"27_CR12","unstructured":"Guan, Z., Xu, T., Liang, Y.: When will generative adversarial imitation learning algorithms attain global convergence? In: International Conference on Artificial Intelligence and Statistics, pp. 1117\u20131125 (2021)"},{"key":"27_CR13","unstructured":"Haarnoja, T., Tang, H., Abbeel, P., Levine, S.: Reinforcement learning with deep energy-based policies. In: International Conference on Machine Learning, pp. 1352\u20131361 (2017)"},{"key":"27_CR14","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp. 1861\u20131870 (2018)"},{"key":"27_CR15","unstructured":"Haarnoja, T., et al.: Soft actor-critic algorithms and applications. arXiv preprint arXiv:1812.05905 (2018)"},{"key":"27_CR16","unstructured":"Ho, J., Ermon, S.: Generative adversarial imitation learning. In: Advances in Neural Information Processing Systems, vol. 29, pp. 4565\u20134573 (2016)"},{"key":"27_CR17","doi-asserted-by":"crossref","unstructured":"Jabri, M.K.: Robot manipulation learning using generative adversarial imitation learning. In: Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence, IJCAI-21, pp. 4893\u20134894 (2021)","DOI":"10.24963\/ijcai.2021\/678"},{"key":"27_CR18","doi-asserted-by":"crossref","unstructured":"Kim, K.E., Park, H.S.: Imitation learning via kernel mean embedding. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 3415\u20133422 (2018)","DOI":"10.1609\/aaai.v32i1.11720"},{"key":"27_CR19","unstructured":"Li, S., Xiao, S., Zhu, S., Du, N., Xie, Y., Song, L.: Learning temporal point processes via reinforcement learning. arXiv preprint arXiv:1811.05016 (2018)"},{"issue":"3\u20134","key":"27_CR20","first-page":"293","volume":"8","author":"LJ Lin","year":"1992","unstructured":"Lin, L.J.: Self-improving reactive agents based on reinforcement learning, planning and teaching. Mach. Learn. 8(3\u20134), 293\u2013321 (1992)","journal-title":"Mach. Learn."},{"key":"27_CR21","unstructured":"Mohri, M., Rostamizadeh, A., Talwalkar, A.: Foundations of Machine Learning. MIT Press, Cambridge (2018)"},{"issue":"2","key":"27_CR22","doi-asserted-by":"publisher","first-page":"429","DOI":"10.2307\/1428011","volume":"29","author":"A M\u00fcller","year":"1997","unstructured":"M\u00fcller, A.: Integral probability metrics and their generating classes of functions. Adv. Appl. Probab. 29(2), 429\u2013443 (1997)","journal-title":"Adv. Appl. Probab."},{"key":"27_CR23","unstructured":"Ng, A.Y., Russell, S.J., et al.: Algorithms for inverse reinforcement learning. In: International Conference on Machine Learning, pp. 663\u2013670 (2000)"},{"key":"27_CR24","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. John Wiley & Sons, Hoboken (2014)"},{"key":"27_CR25","unstructured":"Rahimi, A., Recht, B.: Random features for large-scale kernel machines. In: Advances in Neural Information Processing Systems, vol. 20, pp. 1177\u20131184 (2007)"},{"key":"27_CR26","unstructured":"Ross, S., Bagnell, D.: Efficient reductions for imitation learning. In: International Conference on Artificial Intelligence and Statistics, pp. 661\u2013668 (2010)"},{"key":"27_CR27","unstructured":"Ross, S., Gordon, G., Bagnell, D.: A reduction of imitation learning and structured prediction to no-regret online learning. In: International Conference on Artificial Intelligence and Statistics, pp. 627\u2013635 (2011)"},{"key":"27_CR28","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"key":"27_CR29","doi-asserted-by":"crossref","unstructured":"Shalev-Shwartz, S., Ben-David, S.: Understanding Machine Learning: From Theory to Algorithms. Cambridge University Press, Cambridge (2014)","DOI":"10.1017\/CBO9781107298019"},{"key":"27_CR30","unstructured":"Shani, L., Zahavy, T., Mannor, S.: Online apprenticeship learning. arXiv preprint arXiv:2102.06924 (2021)"},{"key":"27_CR31","doi-asserted-by":"crossref","unstructured":"Shi, J.C., Yu, Y., Da, Q., Chen, S.Y., Zeng, A.X.: Virtual-Taobao: virtualizing real-world online retail environment for reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 4902\u20134909 (2019)","DOI":"10.1609\/aaai.v33i01.33014902"},{"key":"27_CR32","doi-asserted-by":"crossref","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (1998)","DOI":"10.1109\/TNN.1998.712192"},{"key":"27_CR33","unstructured":"Syed, U., Schapire, R.E.: A game-theoretic approach to apprenticeship learning. In: Advances in Neural Information Processing Systems, vol. 20, pp. 1449\u20131456 (2007)"},{"key":"27_CR34","unstructured":"Syed, U., Schapire, R.E.: A reduction from apprenticeship learning to classification. In: Advances in Neural Information Processing Systems, vol. 23, pp. 2253\u20132261. Citeseer (2010)"},{"key":"27_CR35","unstructured":"Xu, T., Li, Z., Yu, Y.: On value discrepancy of imitation learning. arXiv preprint arXiv:1911.07027 (2019)"},{"key":"27_CR36","unstructured":"Xu, T., Li, Z., Yu, Y.: Error bounds of imitating policies and environments. In: Advances in Neural Information Processing Systems, vol. 33, pp. 15737\u201315749 (2020)"},{"key":"27_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, Y.F., Luo, F.M., Yu, Y.: Improve generated adversarial imitation learning with reward variance regularization. Mach. Learn. 111(3), 977\u2013995 (2022)","DOI":"10.1007\/s10994-021-06083-7"},{"key":"27_CR38","unstructured":"Zhang, Y., Cai, Q., Yang, Z., Wang, Z.: Generative adversarial imitation learning with neural network parameterization: global optimality and convergence rate. In: International Conference on Machine Learning, pp. 11044\u201311054 (2020)"},{"key":"27_CR39","unstructured":"Ziebart, B.D., et al.: Maximum entropy inverse reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 8, pp. 1433\u20131438 (2008)"}],"container-title":["Lecture Notes in Computer Science","Parallel Problem Solving from Nature \u2013 PPSN XVII"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-14714-2_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T16:44:15Z","timestamp":1710261855000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-14714-2_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031147135","9783031147142"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-14714-2_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"14 August 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PPSN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Parallel Problem Solving from Nature","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Dortmund","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ppsn2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ppsn2022.cs.tu-dortmund.de\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"185","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"85","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"46% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.75","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.11","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}