{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T16:18:17Z","timestamp":1761581897128,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030286187"},{"type":"electronic","value":"9783030286194"}],"license":[{"start":{"date-parts":[[2019,11,28]],"date-time":"2019-11-28T00:00:00Z","timestamp":1574899200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-28619-4_34","type":"book-chapter","created":{"date-parts":[[2019,11,28]],"date-time":"2019-11-28T00:04:15Z","timestamp":1574899455000},"page":"437-453","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["AdaPT: Zero-Shot Adaptive Policy Transfer for Stochastic Dynamical Systems"],"prefix":"10.1007","author":[{"given":"James","family":"Harrison","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Animesh","family":"Garg","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Boris","family":"Ivanovic","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuke","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Silvio","family":"Savarese","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Li","family":"Fei-Fei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marco","family":"Pavone","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,11,28]]},"reference":[{"key":"34_CR1","doi-asserted-by":"crossref","unstructured":"Abbeel, P., Quigley, M., Ng, A.Y.: Using inaccurate models in reinforcement learning. In: Proceedings of the 23rd International Conference on Machine Learning. ACM (2006)","DOI":"10.1145\/1143844.1143845"},{"key":"34_CR2","unstructured":"Bousmalis, K., Irpan, A., Wohlhart, P., Bai, Y., Kelcey, M., Kalakrishnan, M., Downs, L., Ibarz, J., Pastor, P., Konolige, K., et al.: Using simulation and domain adaptation to improve efficiency of deep robotic grasping (2017). \narXiv:1709.07857"},{"key":"34_CR3","unstructured":"Brockman, G., Cheung, V., Pettersson, L., Schneider, J., Schulman, J., Tang, J., Zaremba, W.: Openai gym (2016). \narXiv:1606.01540"},{"key":"34_CR4","unstructured":"Christiano, P., Shah, Z., Mordatch, I., Schneider, J., Blackwell, T., Tobin, J., Abbeel, P., Zaremba, W.: Transfer from simulation to real world through learning deep inverse dynamics model (2016). \narXiv:1610.03518"},{"key":"34_CR5","unstructured":"Deisenroth, M., Rasmussen, C.E.: Pilco: a model-based and data-efficient approach to policy search. In: Proceedings of the 28th International Conference on Machine Learning (ICML-11) (2011)"},{"key":"34_CR6","unstructured":"Devin, C., Gupta, A., Darrell, T., Abbeel, P., Levine, S.: Learning modular neural network policies for multi-task and multi-robot transfer (2016). \narXiv:1609.07088"},{"key":"34_CR7","unstructured":"Farshidian, F., Pardo, D., Buchli, J.: Sequential linear quadratic optimal control for nonlinear switched systems (2016). \narXiv:1609.02198"},{"key":"34_CR8","unstructured":"Gu, S., Lillicrap, T., Sutskever, I., Levine, S.: Continuous deep Q-learning with modelbased acceleration. ICML (2016)"},{"key":"34_CR9","unstructured":"Heess, N., Wayne, G., Silver, D., Lillicrap, T., Erez, T., Tassa, Y.: Learning continuous control policies by stochastic value gradients. In: NIPS (2015)"},{"key":"34_CR10","unstructured":"Kahn, G., Zhang, T., Levine, S., Abbeel, P.: Plato: policy learning using adaptive trajectory optimization (2016). \narXiv:1603.00622"},{"key":"34_CR11","unstructured":"Kober, J., Bagnell, J.A., Peters, J.: Reinforcement learning in robotics: a survey. Int. J. Robot. Res. 0 278 364 913 495 721 (2013)"},{"key":"34_CR12","unstructured":"Levine, S., Abbeel, P.: Learning neural network policies with guided policy search under unknown dynamics. In: Advances in Neural Information Processing Systems (2014)"},{"issue":"39","key":"34_CR13","first-page":"1","volume":"17","author":"S Levine","year":"2016","unstructured":"Levine, S., Finn, C., Darrell, T., Abbeel, P.: End-to-end training of deep visuomotor policies. J. Mach. Learn. Res. 17(39), 1\u201340 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"34_CR14","doi-asserted-by":"crossref","unstructured":"Mandlekar*, A., Zhu*, Y., Garg*, A., Fei-Fei, L., Savarese, S.: Adversarially robust policy learning through active construction of physically-plausible perturbations. In: IEEE International Conference on Intelligent Robots and Systems (IROS) (2017) (* equal contribution)","DOI":"10.1109\/IROS.2017.8206245"},{"key":"34_CR15","doi-asserted-by":"crossref","unstructured":"Mayne, D.Q., Kerrigan, E.C., Van Wyk, E., Falugi, P.: Tube-based robust nonlinear model predictive control. Int. J. Robust Nonlinear Control (2011)","DOI":"10.1002\/rnc.1758"},{"key":"34_CR16","doi-asserted-by":"crossref","unstructured":"Mitrovic, D., Klanke, S., Vijayakumar, S.: Adaptive optimal feedback control with learned internal dynamics models. In: From Motor Learning to Interaction Learning in Robots, pp. 65\u201384. Springer, New York (2010)","DOI":"10.1007\/978-3-642-05181-4_4"},{"issue":"7540","key":"34_CR17","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., Kavukcuoglu, K., Silver, D., Rusu, A.A., Veness, J., Bellemare, M.G., Graves, A., Riedmiller, M., Fidjeland, A.K., Ostrovski, G., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"34_CR18","unstructured":"Moerland, T., Broekens, J., Jonker, C.: Learning multimodal transition dynamics for model-based reinforcement learning (2017). \narXiv:1705.00470"},{"key":"34_CR19","doi-asserted-by":"crossref","unstructured":"Mordatch, I., Lowrey, K., Todorov, E.: Ensemble-cio: full-body dynamic motion planning that transfers to physical humanoids. In: 2015 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 5307\u20135314. IEEE (2015)","DOI":"10.1109\/IROS.2015.7354126"},{"key":"34_CR20","doi-asserted-by":"crossref","unstructured":"Neunert, M., de Crousaz, C., Furrer, F., Kamel, M., Farshidian, F., Siegwart, R., Buchli, J.: Fast nonlinear model predictive control for unified trajectory optimization and tracking. In: Proceedings of the IEEE Conference on Robotics and Automation (2016)","DOI":"10.1109\/ICRA.2016.7487274"},{"key":"34_CR21","unstructured":"Peng, X.B., Andrychowicz, M., Zaremba, W., Abbeel, P.: Sim-to-real transfer of robotic control with dynamics randomization (2017). \narXiv:1710.06537"},{"key":"34_CR22","unstructured":"Pinto, L., Davidson, J., Sukthankar, R., Gupta, A.: Robust adversarial reinforcement learning (2017). \narXiv:1703.02702"},{"key":"34_CR23","unstructured":"Rajeswaran, A., Ghotra, S., Levine, S., Ravindran, B.: EPOpt: learning robust neural network policies using model ensembles (2016). \narXiv:1610.01283"},{"key":"34_CR24","unstructured":"Rusu, A.A., Rabinowitz, N.C., Desjardins, G., Soyer, H., Kirkpatrick, J., Kavukcuoglu, K., Pascanu, R., Hadsell, R.: Progressive neural networks (2016). \narXiv:1606.04671"},{"key":"34_CR25","unstructured":"Schulman, J., Levine, S., Moritz, P., Jordan, M., Abbeel, P.: Trust region policy optimization. In: ICML (2015)"},{"key":"34_CR26","doi-asserted-by":"crossref","unstructured":"Singh, S., Majumdar, A., Slotine, J.-J., Pavone, M.: Robust online motion planning via contraction theory and convex optimization. In: Proceedings of the IEEE Conference on Robotics and Automation (2017)","DOI":"10.1109\/ICRA.2017.7989693"},{"key":"34_CR27","doi-asserted-by":"crossref","unstructured":"Tassa, Y., Erez, T., Todorov, E.: Synthesis and stabilization of complex behaviors through online trajectory optimization. In: 2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 4906\u20134913. IEEE (2012)","DOI":"10.1109\/IROS.2012.6386025"},{"key":"34_CR28","doi-asserted-by":"crossref","unstructured":"Taylor, M.E., Stone, P.: Transfer learning for reinforcement learning domains: a survey. J. Mach. Learn. Res. 10 (2009)","DOI":"10.1007\/978-3-642-01882-4_2"},{"key":"34_CR29","unstructured":"Tobin, J., Fong, R., Ray, A., Schneider, J., Zaremba, W., Abbeel, P.: Domain randomization for transferring deep neural networks from simulation to the real world (2017). \narXiv:1703.06907"},{"key":"34_CR30","unstructured":"Todorov, E., Li, W.: A generalized iterative LQG method for locally-optimal feedback control of constrained nonlinear stochastic systems. In: Proceedings of the 2005 American Control Conference, pp. 300-306. IEEE (2005)"},{"key":"34_CR31","doi-asserted-by":"crossref","unstructured":"Webb, D.J., van den Berg, J.: Kinodynamic RRT*: asymptotically optimal motion planning for robots with linear dynamics. In: IEEE International Conference on Robotics and Automation (ICRA) (2013)","DOI":"10.1109\/ICRA.2013.6631299"},{"key":"34_CR32","unstructured":"Zhou, K., Doyle, J.C., Glover, K., et al.: Robust and Optimal Control, vol. 40 (1996)"}],"container-title":["Springer Proceedings in Advanced Robotics","Robotics Research"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-28619-4_34","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,9]],"date-time":"2020-04-09T21:15:23Z","timestamp":1586466923000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-28619-4_34"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11,28]]},"ISBN":["9783030286187","9783030286194"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-28619-4_34","relation":{},"ISSN":["2511-1256","2511-1264"],"issn-type":[{"type":"print","value":"2511-1256"},{"type":"electronic","value":"2511-1264"}],"subject":[],"published":{"date-parts":[[2019,11,28]]},"assertion":[{"value":"28 November 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}