{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,31]],"date-time":"2025-12-31T12:07:03Z","timestamp":1767182823188,"version":"3.28.0"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,12,14]],"date-time":"2021-12-14T00:00:00Z","timestamp":1639440000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,12,14]],"date-time":"2021-12-14T00:00:00Z","timestamp":1639440000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,12,14]]},"DOI":"10.1109\/cdc45484.2021.9683134","type":"proceedings-article","created":{"date-parts":[[2022,2,1]],"date-time":"2022-02-01T20:50:18Z","timestamp":1643748618000},"page":"2880-2887","source":"Crossref","is-referenced-by-count":14,"title":["Learning Accurate Long-term Dynamics for Model-based Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Nathan","family":"Lambert","sequence":"first","affiliation":[{"name":"University of California,Department of Electrical Engineering and Computer Sciences,Berkeley,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Albert","family":"Wilcox","sequence":"additional","affiliation":[{"name":"University of California,Department of Electrical Engineering and Computer Sciences,Berkeley,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Howard","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of California,Department of Electrical Engineering and Computer Sciences,Berkeley,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kristofer S. J.","family":"Pister","sequence":"additional","affiliation":[{"name":"University of California,Department of Electrical Engineering and Computer Sciences,Berkeley,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Roberto","family":"Calandra","sequence":"additional","affiliation":[{"name":"Facebook AI Research,Menlo Park,CA,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2012.2206474"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/MMAR.2017.8046794"},{"journal-title":"OpenAI Gym","year":"2016","author":"brockman","key":"ref31"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/s10472-015-9463-9"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487144"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CDC.2003.1271710"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICHR.2006.321375"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2007.01.002"},{"key":"ref13","first-page":"465","article-title":"PILCO: A Model-Based and Data-Efficient Approach to Policy Search","author":"deisenroth","year":"2011","journal-title":"International Conference on Machine Learning"},{"key":"ref14","first-page":"12 498","article-title":"When to trust your model: Model-based policy optimization","author":"janner","year":"2019","journal-title":"Neural Information Processing Systems"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8594193"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2930489"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1002\/047134608X.W1046"},{"key":"ref18","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v29i1.9590","article-title":"Improving multi-step prediction of learned time series models","author":"venkatraman","year":"2015","journal-title":"AAAI Conference on Artificial Intelligence"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/RVSP.2011.90"},{"key":"ref28","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"International Conference on Learning Representations"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/0005-1098(89)90002-2"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1512\/iumj.1957.6.56038"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"journal-title":"Combating the compounding-error problem with a multi-step model","year":"2019","author":"asadi","key":"ref6"},{"key":"ref29","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"Neural Information Processing Systems"},{"journal-title":"Optimal Control Theory An Introduction","year":"2004","author":"kirk","key":"ref5"},{"key":"ref8","first-page":"761","article-title":"Objective mismatch in model-based reinforcement learning","author":"lambert","year":"2020","journal-title":"Learning for Dynamics and Control"},{"journal-title":"Learning to combat compounding-error in model-based reinforcement learning","year":"2019","author":"xiao","key":"ref7"},{"key":"ref2","article-title":"Deep reinforcement learning in a handful of trials using probabilistic dynamics models","author":"chua","year":"2018","journal-title":"Neural Information Processing Systems"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/s10339-011-0404-1"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989202"},{"journal-title":"Learning unstable dynamical systems with time-weighted logarithmic loss","year":"2020","author":"nar","key":"ref20"},{"key":"ref22","first-page":"227","article-title":"Optimizing long-term predictions for model-based policy search","author":"doerr","year":"2017","journal-title":"Conference on Robot Learning"},{"journal-title":"Learning Dynamics Model in Reinforcement Learning by Incorporating the Long Term Future","year":"2019","author":"ke","key":"ref21"},{"key":"ref24","first-page":"253","article-title":"Using trajectory data to improve bayesian optimization for reinforcement learning","volume":"15","author":"wilson","year":"2014","journal-title":"The Journal of Machine Learning Research"},{"key":"ref23","first-page":"424","article-title":"On simulation and trajectory prediction with gaussian process dynamics","author":"hewing","year":"2020","journal-title":"Learning for Dynamics and Control"},{"journal-title":"World Models","year":"2018","author":"ha","key":"ref26"},{"key":"ref25","first-page":"6571","article-title":"Neural ordinary differential equations","author":"chen","year":"2018","journal-title":"Neural Information Processing Systems"}],"event":{"name":"2021 60th IEEE Conference on Decision and Control (CDC)","start":{"date-parts":[[2021,12,14]]},"location":"Austin, TX, USA","end":{"date-parts":[[2021,12,17]]}},"container-title":["2021 60th IEEE Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9682670\/9682776\/09683134.pdf?arnumber=9683134","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,16]],"date-time":"2023-11-16T18:34:15Z","timestamp":1700159655000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9683134\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,14]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/cdc45484.2021.9683134","relation":{},"subject":[],"published":{"date-parts":[[2021,12,14]]}}}