{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T23:07:24Z","timestamp":1743116844021,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":23,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819787043"},{"type":"electronic","value":"9789819787050"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8705-0_9","type":"book-chapter","created":{"date-parts":[[2025,2,7]],"date-time":"2025-02-07T14:37:45Z","timestamp":1738939065000},"page":"120-150","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Offline Imitation Learning by\u00a0Controlling the\u00a0Effective Planning Horizon"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-0389-7270","authenticated-orcid":false,"given":"Hee-Jun","family":"Ahn","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5743-4485","authenticated-orcid":false,"given":"Seong-Woong","family":"Shim","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0684-607X","authenticated-orcid":false,"given":"Byung-Jun","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,2,8]]},"reference":[{"key":"9_CR1","unstructured":"Fu, J., Kumar, A., Nachum, O., Tucker, G., Levine, S.: D4rl: datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219 (2020)"},{"key":"9_CR2","unstructured":"Gulrajani, I., Ahmed, F., Arjovsky, M., Dumoulin, V., Courville, A.C.: Improved training of Wasserstein GANs. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"9_CR3","unstructured":"Ho, J., Ermon, S.: Generative adversarial imitation learning. In: Advances in Neural Information Processing Systems, vol. 29 (2016)"},{"key":"9_CR4","unstructured":"Hu, H., Yang, Y., Zhao, Q., Zhang, C.: On the role of discount factor in offline reinforcement learning. In: International Conference on Machine Learning, pp. 9072\u20139098. PMLR (2022)"},{"key":"9_CR5","unstructured":"Janner, M., Fu, J., Zhang, M., Levine, S.: When to trust your model: model-based policy optimization. Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"9_CR6","unstructured":"Jiang, N., Kulesza, A., Singh, S., Lewis, R.: The dependence of effective planning horizon on model accuracy. In: Proceedings of the 2015 International Conference on Autonomous Agents and Multiagent Systems, pp. 1181\u20131189. Citeseer (2015)"},{"key":"9_CR7","series-title":"Springer Proceedings in Advanced Robotics","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1007\/978-3-030-66723-8_19","volume-title":"Algorithmic Foundations of Robotics XIV","author":"L Ke","year":"2021","unstructured":"Ke, L., Choudhury, S., Barnes, M., Sun, W., Lee, G., Srinivasa, S.: Imitation learning as f-Divergence minimization. In: LaValle, S.M., Lin, M., Ojala, T., Shell, D., Yu, J. (eds.) WAFR 2020. SPAR, vol. 17, pp. 313\u2013329. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-66723-8_19"},{"key":"9_CR8","first-page":"8252","volume":"35","author":"GH Kim","year":"2022","unstructured":"Kim, G.H., Lee, J., Jang, Y., Yang, H., Kim, K.E.: LobsDICE: offline learning from observation via stationary distribution correction estimation. Adv. Neural. Inf. Process. Syst. 35, 8252\u20138264 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"9_CR9","unstructured":"Kim, G.H., et al.: DemoDICE: offline imitation learning with supplementary imperfect demonstrations. In: International Conference on Learning Representations (2021)"},{"key":"9_CR10","unstructured":"Kostrikov, I., Agrawal, K.K., Dwibedi, D., Levine, S., Tompson, J.: Discriminator-actor-critic: addressing sample inefficiency and reward bias in adversarial imitation learning. arXiv preprint arXiv:1809.02925 (2018)"},{"key":"9_CR11","unstructured":"Kostrikov, I., Nachum, O., Tompson, J.: Imitation learning via off-policy distribution matching. arXiv preprint arXiv:1912.05032 (2019)"},{"key":"9_CR12","unstructured":"Lee, B., Lee, J., Vrancx, P., Kim, D., Kim, K.E.: Batch reinforcement learning with hyperparameter gradients. In: International Conference on Machine Learning, pp. 5725\u20135735. PMLR (2020)"},{"key":"9_CR13","unstructured":"Lee, J., Jeon, W., Lee, B., Pineau, J., Kim, K.E.: OptiDICE: offline policy optimization via stationary distribution correction estimation. In: International Conference on Machine Learning, pp. 6120\u20136130. PMLR (2021)"},{"key":"9_CR14","unstructured":"Li, Z., Xu, T., Yu, Y., Luo, Z.Q.: Rethinking valueDice: does it really improve performance? arXiv preprint arXiv:2202.02468 (2022)"},{"key":"9_CR15","unstructured":"Ma, Y., Shen, A., Jayaraman, D., Bastani, O.: Versatile offline imitation from observations and examples via regularized state-occupancy matching. In: International Conference on Machine Learning, pp. 14639\u201314663. PMLR (2022)"},{"key":"9_CR16","unstructured":"Ng, A.Y., Russell, S., et\u00a0al.: Algorithms for inverse reinforcement learning. In: International Conference on Machine Learning, vol.\u00a01, p.\u00a02 (2000)"},{"key":"9_CR17","unstructured":"Petrik, M., Scherrer, B.: Biasing approximate dynamic programming with a lower discount factor. In: Advances in Neural Information Processing Systems, vol. 21 (2008)"},{"issue":"1","key":"9_CR18","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1162\/neco.1991.3.1.88","volume":"3","author":"DA Pomerleau","year":"1991","unstructured":"Pomerleau, D.A.: Efficient training of artificial neural networks for autonomous navigation. Neural Comput. 3(1), 88\u201397 (1991)","journal-title":"Neural Comput."},{"key":"9_CR19","unstructured":"Ross, S., Gordon, G., Bagnell, D.: A reduction of imitation learning and structured prediction to no-regret online learning. In: Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics, pp. 627\u2013635. JMLR Workshop and Conference Proceedings (2011)"},{"key":"9_CR20","doi-asserted-by":"crossref","unstructured":"Todorov, E., Erez, T., Tassa, Y.: Mujoco: a physics engine for model-based control. In: 2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 5026\u20135033. IEEE (2012)","DOI":"10.1109\/IROS.2012.6386109"},{"key":"9_CR21","unstructured":"Torabi, F., Warnell, G., Stone, P.: Generative adversarial imitation from observation. arXiv preprint arXiv:1807.06158 (2018)"},{"key":"9_CR22","unstructured":"Wu, Y., Tucker, G., Nachum, O.: Behavior regularized offline reinforcement learning. CoRR abs\/1911.11361 (2019)"},{"key":"9_CR23","first-page":"15737","volume":"33","author":"T Xu","year":"2020","unstructured":"Xu, T., Li, Z., Yu, Y.: Error bounds of imitating policies and environments. Adv. Neural. Inf. Process. Syst. 33, 15737\u201315749 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8705-0_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,7]],"date-time":"2025-02-07T14:38:08Z","timestamp":1738939088000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8705-0_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819787043","9789819787050"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8705-0_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"8 February 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPRAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition and Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Jeju Island","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 June 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 June 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icprai2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/brain.korea.ac.kr\/icprai2024\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}