{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,18]],"date-time":"2026-02-18T22:48:38Z","timestamp":1771454918150,"version":"3.50.1"},"reference-count":63,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,1]],"date-time":"2022-10-01T00:00:00Z","timestamp":1664582400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key R&#x0026;D Program of China","award":["2020AAA0107200"],"award-info":[{"award-number":["2020AAA0107200"]}]},{"DOI":"10.13039\/501100001809","name":"NSFC","doi-asserted-by":"crossref","award":["61876077"],"award-info":[{"award-number":["61876077"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Collaborative Innovation Center of Novel Software Technology and Industrialization"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2022,10,1]]},"DOI":"10.1109\/tpami.2021.3096966","type":"journal-article","created":{"date-parts":[[2021,8,19]],"date-time":"2021-08-19T20:10:46Z","timestamp":1629403846000},"page":"6968-6980","source":"Crossref","is-referenced-by-count":28,"title":["Error Bounds of Imitating Policies and Environments for Reinforcement Learning"],"prefix":"10.1109","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9409-448X","authenticated-orcid":false,"given":"Tian","family":"Xu","sequence":"first","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}]},{"given":"Ziniu","family":"Li","sequence":"additional","affiliation":[{"name":"Shenzhen Research Institute of Big Data, The Chinese University of Hong Kong, Shenzhen, Shenzhen, China"}]},{"given":"Yang","family":"Yu","sequence":"additional","affiliation":[{"name":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}]}],"member":"263","reference":[{"issue":"2\/3","key":"ref1","doi-asserted-by":"crossref","first-page":"209","DOI":"10.1023\/A:1017984413808","article-title":"Near-optimal reinforcement learning in polynomial time","volume":"49","author":"Kearns","year":"2002","journal-title":"Mach. Learn."},{"key":"ref2","first-page":"263","article-title":"Minimax regret bounds for reinforcement learning","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Azar"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref4","volume-title":"Markov Decision Processes: Discrete Stochastic Dynamic Programming","author":"Puterman","year":"2014"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.1.88"},{"key":"ref6","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","volume-title":"Proc. 17th Int. Conf. Mach. Learn.","author":"Ng"},{"key":"ref7","first-page":"4565","article-title":"Generative adversarial imitation learning","volume-title":"Proc. 30th Int. Conf. Neural Inf. Process. Syst.","author":"Ho"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2015.2509024"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460487"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/687"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref13","first-page":"1449","article-title":"A game-theoretic approach to apprenticeship learning","volume-title":"Proc. 20th Int. Conf. Neural Inf. Process. Syst.","author":"Syed"},{"key":"ref14","article-title":"Discriminator-actor-critic: Addressing sample inefficiency and reward bias in adversarial imitation learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kostrikov"},{"key":"ref15","article-title":"Imitation learning via off-policy distribution matching","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kostrikov"},{"key":"ref16","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proc. 14th Int. Conf. Artif. Intell. Statist.","author":"Ross"},{"key":"ref17","first-page":"661","article-title":"Efficient reductions for imitation learning","volume-title":"Proc. 13th Int. Conf. Artif. Intell. Statist.","author":"Ross"},{"key":"ref18","first-page":"2914","article-title":"Toward the fundamental limits of imitation learning","volume-title":"Proc. 34th Int. Conf. Neural Inf. Process. Syst.","author":"Rajaraman"},{"key":"ref19","article-title":"On computation and generalization of generative adversarial imitation learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chen"},{"key":"ref20","first-page":"11044","article-title":"Generative adversarial imitation learning with neural network parameterization: Global optimality and convergence rate","author":"Zhang"},{"key":"ref21","first-page":"9075","article-title":"Is long horizon rl more difficult than short horizon rl?","author":"Wang"},{"key":"ref22","first-page":"3395","article-title":"Open problem: The dependence of sample complexity lower bounds on planning horizon","volume-title":"Proc. 31st Conf. Learn. Theory","author":"Jiang"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v29i1.9590"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014902"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330933"},{"key":"ref26","article-title":"Algorithmic framework for model-based deep reinforcement learning with theoretical guarantees","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Luo"},{"key":"ref27","first-page":"498","article-title":"When to trust your model: Model-based policy optimization","volume-title":"Proc. 33rd Int. Conf. Neural Inf. Process. Syst.","author":"Janner"},{"key":"ref28","first-page":"13399","article-title":"PC-PG: Policy cover directed exploration for provable policy gradient learning","volume-title":"Proc. 34th Int. Conf. Neural Inf. Process. Syst.","author":"Agarwal"},{"key":"ref29","first-page":"2672","article-title":"Generative adversarial nets","volume-title":"Proc. 27th Int. Conf. Neural Inf. Process. Syst.","author":"Goodfellow"},{"key":"ref30","article-title":"Model-ensemble trust-region policy optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kurutach"},{"key":"ref31","first-page":"264","article-title":"Lipschitz continuity in model-based reinforcement learning","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Asadi"},{"key":"ref32","first-page":"2253","article-title":"A reduction from apprenticeship learning to classification","volume-title":"Proc. 23rd Int. Conf. Neural Inf. Process. Syst.","author":"Syed"},{"key":"ref33","article-title":"Learning robust rewards with adverserial inverse reinforcement learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Fu"},{"key":"ref34","first-page":"279","article-title":"Imitation learning from pixel-level demonstrations by hashreward","author":"Cai"},{"key":"ref35","first-page":"10022","article-title":"Of moments and matching: a game-theoretic framework for closing the imitation gap","author":"Swamy"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390286"},{"key":"ref37","first-page":"2760","article-title":"Model-free imitation learning with policy optimization","volume-title":"Proc. 33rd Int. Conf. Mach. Learn.","author":"Ho"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/springerreference_179268"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"key":"ref40","first-page":"271","article-title":"f-GAN: Training generative neural samplers using variational divergence minimization","volume-title":"Proc. 30th Int. Conf. Neural Inf. Process. Syst.","author":"Nowozin"},{"key":"ref41","first-page":"1259","article-title":"A divergence minimization perspective on imitation learning methods","volume-title":"Proc. 3rd Conf. Robot Learn.","author":"Ghasemipour"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2006.881731"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1561\/0100000004"},{"key":"ref44","volume-title":"Foundations of Machine Learning","author":"Mohri","year":"2012"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781107298019"},{"key":"ref46","first-page":"224","article-title":"Generalization and equilibrium in generative adversarial nets (GANs)","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Arora"},{"key":"ref47","article-title":"On the discrimination-generalization tradeoff in GANs","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zhang"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.2307\/1428011"},{"key":"ref49","first-page":"214","article-title":"Wasserstein generative adversarial networks","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Arjovsky"},{"key":"ref50","first-page":"6240","article-title":"Spectrally-normalized margin bounds for neural networks","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst.","author":"Bartlett"},{"key":"ref51","first-page":"5767","article-title":"Improved training of wasserstein GANs","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst.","author":"Gulrajani"},{"key":"ref52","article-title":"Deep variational information bottleneck","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Alemi"},{"key":"ref53","article-title":"Variational discriminator bottleneck: Improving imitation learning, inverse RL, and GANs by constraining information flow","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Peng"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1214\/aos\/1017939142"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-1880-7_29"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-013-5368-1"},{"key":"ref57","first-page":"1889","article-title":"Trust region policy optimization","volume-title":"Proc. 32nd Int. Conf. Mach. Learn.","author":"Schulman"},{"key":"ref58","article-title":"OpenAI Gym","author":"Brockman","year":"2016"},{"key":"ref59","first-page":"1856","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Haarnoja"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/820"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/425"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511921889"},{"key":"ref63","first-page":"895","article-title":"Concentration inequalities for the missing mass and for histogram rule error","volume":"4","author":"McAllester","year":"2003","journal-title":"J. Mach. Learn. Res."}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/9893033\/09485061.pdf?arnumber=9485061","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T00:08:16Z","timestamp":1705018096000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9485061\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,1]]},"references-count":63,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2021.3096966","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10,1]]}}}