{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T11:05:39Z","timestamp":1769166339273,"version":"3.49.0"},"reference-count":95,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10611492","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"16945-16953","source":"Crossref","is-referenced-by-count":6,"title":["Deep Model Predictive Optimization"],"prefix":"10.1109","author":[{"given":"Jacob","family":"Sacks","sequence":"first","affiliation":[{"name":"University of Washington,Seattle,WA,USA,98105"}]},{"given":"Rwik","family":"Rana","sequence":"additional","affiliation":[{"name":"University of Washington,Seattle,WA,USA,98105"}]},{"given":"Kevin","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Washington,Seattle,WA,USA,98105"}]},{"given":"Alex","family":"Spitzer","sequence":"additional","affiliation":[{"name":"University of Washington,Seattle,WA,USA,98105"}]},{"given":"Guanya","family":"Shi","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University,Robotics Institute,Pittsburgh,PA,USA,15213"}]},{"given":"Byron","family":"Boots","sequence":"additional","affiliation":[{"name":"University of Washington,Seattle,WA,USA,98105"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Learning Dexterous In-Hand Manipulation","author":"OpenAI","year":"2018"},{"key":"ref2","article-title":"DATT: Deep Adaptive Trajectory Tracking for Quadrotor Control","volume-title":"Conference on Robot Learning (CoRL)","author":"Huang"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-023-06419-4"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abm6597"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794351"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202133"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460528"},{"key":"ref8","article-title":"Understanding Domain Randomization for Sim-to-Real Transfer","author":"Chen","year":"2021"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/S0098-1354(98)00301-9"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989202"},{"key":"ref11","first-page":"1994","article-title":"The power of predictions in online control","volume":"33","author":"Yu","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.033"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/9481.003.0015"},{"key":"ref14","first-page":"1181","article-title":"The Dependence of Effective Planning Horizon on Model Accuracy","volume-title":"Proceedings of the 2015 International Conference on Autonomous Agents and Multiagent Systems","author":"Jiang"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989043"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3508038"},{"key":"ref17","first-page":"1205","article-title":"Optimal Cost Design for Model Predictive Control","volume-title":"Learning for Dynamics and Control","author":"Jain","year":"2021"},{"key":"ref18","article-title":"Proximal Policy Optimization Algorithms","author":"Schulman","year":"2017"},{"key":"ref19","first-page":"1889","article-title":"Trust Region Policy Optimization","volume-title":"International Conference on Machine Learning (ICML)","author":"Schulman"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487277"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812369"},{"key":"ref22","first-page":"3981","article-title":"Learning to learn by gradient descent by gradient descent","author":"Andrychowicz","year":"2016","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"ref23","article-title":"Optimization as a Model for Few-Shot Learning","volume-title":"International Conference on Learning Representations (ICLR)","author":"Ravi"},{"key":"ref24","first-page":"2247","article-title":"Learning Gradient Descent: Better Generalization and Longer Horizons","volume-title":"International Conference on Machine Learning (ICML)","author":"Lv"},{"key":"ref25","article-title":"Training Stronger Baselines for Learning to Optimize","volume":"33","author":"Chen","year":"2020","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"ref26","article-title":"Bootstrapped Meta-Learning","author":"Flennerhag","year":"2021"},{"key":"ref27","first-page":"9807","article-title":"Learning to Generalize Provably in Learning to Optimize","volume-title":"International Conference on Artificial Intelligence and Statistics","author":"Yang"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20050-2_23"},{"key":"ref29","article-title":"Training Learned Optimizers with Randomly Initialized Learned Optimizers","author":"Metz","year":"2021"},{"key":"ref30","article-title":"Tasks, Stability, Architecture, and Compute: Training More Effective Learned Optimizers, and Using Them to Train Themselves","author":"Metz","year":"2020"},{"key":"ref31","first-page":"3751","article-title":"Learned Optimizers that Scale and Generalize","volume-title":"International Conference on Machine Learning (ICML)","author":"Wichrowska"},{"key":"ref32","first-page":"4556","article-title":"Understanding and Correcting Pathologies in the Training of Learned Optimizers","volume-title":"International Conference on Machine Learning (ICML)","author":"Metz"},{"key":"ref33","article-title":"Learning to Optimize","author":"Li","year":"2016"},{"key":"ref34","article-title":"Learning to Optimize Neural Nets","author":"Li","year":"2017"},{"key":"ref35","first-page":"16 455","article-title":"Discovered Policy Optimisation","volume":"35","author":"Lu","year":"2022","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"ref36","first-page":"1060","article-title":"Discovering Reinforcement Learning Algorithms","volume":"33","author":"Oh","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref37","article-title":"Improving Generalization in Meta Reinforcement Learning Using Learned Objectives","author":"Kirsch","year":"2019"},{"key":"ref38","article-title":"Evolved Policy Gradients","volume":"31","author":"Houthooft","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref39","article-title":"Evolving Reinforcement Learning Algorithms","author":"Co-Reyes","year":"2021"},{"key":"ref40","article-title":"Learning to reinforcement learn","author":"Wang","year":"2016"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460656"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2015.xi.012"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.23919\/ACC.2004.1383790"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2016.7759592"},{"key":"ref45","article-title":"Deep Reinforcement Learning in a Handful of Trials using Probabilistic Dynamics Models","author":"Chua","year":"2018"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463189"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989324"},{"key":"ref48","article-title":"From Pixels to Torques: Policy Learning with Deep Dynamical Models","author":"Wahlstr\u00f6m","year":"2015"},{"key":"ref49","article-title":"Embed to Control: A Locally Linear Latent Dynamics Model for Control from Raw Images","author":"Watter","year":"2015"},{"key":"ref50","first-page":"1751","article-title":"Robust Locally-Linear Controllable Embedding","volume-title":"International Conference on Artificial Intelligence and Statistics (AISTATS)","author":"Banijamali"},{"key":"ref51","article-title":"Visual Foresight: Model-Based Deep Reinforcement Learning for Vision-Based Robotic Control","author":"Ebert","year":"2018"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1088\/1742-5468\/ab3455"},{"key":"ref53","first-page":"2555","article-title":"Learning Latent Dynamics for Planning from Pixels","volume-title":"International Conference on Machine Learning (ICML)","author":"Hafner"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2013.6614995"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2017.2753460"},{"key":"ref56","article-title":"Plan Online, Learn Offline: Efficient Learning and Exploration via Model-Based Control","author":"Lowrey","year":"2018"},{"key":"ref57","article-title":"Blending MPC & Value Function Approximation for Efficient Reinforcement Learning","author":"Bhardwaj","year":"2020"},{"key":"ref58","first-page":"840","article-title":"Information Theoretic Model Predictive Q-Learning","volume-title":"Learning for Dynamics & Control (L4DC)","author":"Bhardwaj"},{"key":"ref59","first-page":"291","article-title":"The Differentiable Cross-Entropy Method","volume-title":"International Conference on Machine Learning (ICML)","author":"Amos"},{"key":"ref60","article-title":"QMDP-Net: Deep Learning for Planning under Partial Observability","author":"Karkus","year":"2017"},{"key":"ref61","article-title":"Path Integral Networks: End-to-End Differentiable Optimal Control","author":"Okada","year":"2017"},{"key":"ref62","article-title":"Differentiable MPC for End-to-end Planning and Control","author":"Amos","year":"2018"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463164"},{"key":"ref64","article-title":"MPC-Inspired Neural Network Policies for Sequential Decision Making","author":"Pereira","year":"2018"},{"key":"ref65","article-title":"Variational Inference MPC for Robot Motion with Normalizing Flows","author":"Power","year":"2021","journal-title":"Advances in Neural Information Processing Systems (NeurIPS) Workshop on Robot Learning: Self-Supervised and Lifelong Learning"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2022.XVIII.027"},{"key":"ref67","first-page":"1733","article-title":"Learning Sampling Distributions for Model Predictive Control","volume-title":"Conference on Robot Learning (CoRL)","author":"Sacks"},{"key":"ref68","first-page":"258","article-title":"Variational Inference MPC for Bayesian Model-based Reinforcement Learning","volume-title":"Conference on Robot Learning (CoRL)","author":"Okada"},{"key":"ref69","article-title":"Stein Variational Model Predictive Control","author":"Lambert","year":"2020"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/tro.2024.3428428"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160929"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811615"},{"key":"ref73","article-title":"Towards Efficient MPPI Trajectory Generation with Unscented Guidance: U-MPPI Control Strategy","author":"Mohamed","year":"2023"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/icra57147.2024.10611021"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636163"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794127"},{"key":"ref77","article-title":"Residual Policy Learning","author":"Silver","year":"2018"},{"key":"ref78","first-page":"770","article-title":"Continuous Versatile Jumping Using Learned Action Residuals","volume-title":"Learning for Dynamics & Control (L4DC)","author":"Yang"},{"key":"ref79","article-title":"Cajun: Continuous adaptive jumping using a learned centroidal controller","author":"Yang","year":"2023"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341154"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2965911"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3131690"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2022.3177279"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2020.XVI.040"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1017\/S0263574719000158"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8967695"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160836"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2017.2720851"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811564"},{"key":"ref90","article-title":"Actor-Critic Model Predictive Control","author":"Romero","year":"2023"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2022.3141602"},{"key":"ref92","first-page":"8026","article-title":"PyTorch: An Imperative Style, High-Performance Deep Learning Library","volume":"32","author":"Paszke","year":"2019","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)"},{"key":"ref93","article-title":"High-Dimensional Continuous Control Using Generalized Advantage Estimation","author":"Schulman","year":"2015"},{"key":"ref94","article-title":"Adam: A Method for Stochastic Optimization","author":"Kingma","year":"2014"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1145\/355588.365104"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","location":"Yokohama, Japan","start":{"date-parts":[[2024,5,13]]},"end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10611492.pdf?arnumber=10611492","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,11]],"date-time":"2024-08-11T04:12:46Z","timestamp":1723349566000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10611492\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":95,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10611492","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}