{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T18:35:52Z","timestamp":1775327752997,"version":"3.50.1"},"reference-count":198,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2021,11,1]],"date-time":"2021-11-01T00:00:00Z","timestamp":1635724800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,11,1]],"date-time":"2021-11-01T00:00:00Z","timestamp":1635724800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100003141","name":"Consejo Nacional de Ciencia y Tecnolog\u00eda","doi-asserted-by":"crossref","award":["http:\/\/dx.doi.org\/10.13039\/501100003141"],"award-info":[{"award-number":["http:\/\/dx.doi.org\/10.13039\/501100003141"]}],"id":[{"id":"10.13039\/501100003141","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Intel Serv Robotics"],"published-print":{"date-parts":[[2021,11]]},"DOI":"10.1007\/s11370-021-00398-z","type":"journal-article","created":{"date-parts":[[2021,11,16]],"date-time":"2021-11-16T19:04:01Z","timestamp":1637089441000},"page":"773-805","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":91,"title":["A survey on deep learning and deep reinforcement learning in robotics with a tutorial on deep reinforcement learning"],"prefix":"10.1007","volume":"14","author":[{"given":"Eduardo F.","family":"Morales","sequence":"first","affiliation":[]},{"given":"Rafael","family":"Murrieta-Cid","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9788-1128","authenticated-orcid":false,"given":"Israel","family":"Becerra","sequence":"additional","affiliation":[]},{"given":"Marco A.","family":"Esquivel-Basaldua","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,11,16]]},"reference":[{"issue":"11","key":"398_CR1","doi-asserted-by":"publisher","first-page":"2724","DOI":"10.1016\/j.automatica.2008.03.027","volume":"44","author":"A Abate","year":"2008","unstructured":"Abate A, Prandini M, Lygeros J, Sastry S (2008) Probabilistic reachability and safety for controlled discrete time stochastic hybrid systems. Automatica 44(11):2724\u20132734","journal-title":"Automatica"},{"issue":"2","key":"398_CR2","doi-asserted-by":"publisher","first-page":"1143","DOI":"10.1109\/LRA.2020.2966414","volume":"5","author":"A Amini","year":"2020","unstructured":"Amini A, Gilitschenski I, Phillips J, Moseyko J, Banerjee R, Karaman S, Rus D (2020) Learning robust control policies for end-to-end autonomous driving from data-driven simulation. IEEE Robot Autom Lett RA-L 5(2):1143\u20131150","journal-title":"IEEE Robot Autom Lett RA-L"},{"key":"398_CR3","doi-asserted-by":"crossref","unstructured":"Amini A, Rosman G, Karaman S, Rus D (2019) Variational end-to-end navigation and localization. In: IEEE international conference on robotics and automation (ICRA), pp 8958\u20138964","DOI":"10.1109\/ICRA.2019.8793579"},{"key":"398_CR4","unstructured":"Andrychowicz M, Wolski F, Ray A, Schneider J, Fong R, Welinder P, McGrew B, Tobin J, Pieter Abbeel O, Zaremba W (2017) Hindsight experience replay, pp 5048\u20135058"},{"key":"398_CR5","unstructured":"Asseman A, Kornuta T, Ozcan A (2018) Learning beyond simulated physics"},{"issue":"12","key":"398_CR6","doi-asserted-by":"publisher","first-page":"2481","DOI":"10.1109\/TPAMI.2016.2644615","volume":"39","author":"V Badrinarayanan","year":"2017","unstructured":"Badrinarayanan V, Kendall A, Cipolla R (2017) Segnet: a deep convolutional encoder-decoder architecture for image segmentation. IEEE Trans Pattern Anal Mach Intell 39(12):2481\u20132495","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"398_CR7","first-page":"1342","volume":"28","author":"E Bareinboim","year":"2015","unstructured":"Bareinboim E, Forney A, Pearl J (2015) Bandits with unobserved confounders: a causal approach. Adv Neural Inf Process Syst NIPS 28:1342\u20131350","journal-title":"Adv Neural Inf Process Syst NIPS"},{"key":"398_CR8","unstructured":"Barth-Maron G, Hoffman MW, Budden D, Dabney W, Horgan D, Tb D, Muldal A, Heess N, Lillicrap T (2018) Distributed distributional deterministic policy gradients. arXiv preprint arXiv:1804.08617"},{"key":"398_CR9","unstructured":"Bellemare MG, Dabney W, Munos R (2017) A distributional perspective on reinforcement learning. In: International conference on machine learning (ICML), pp 449\u2013458"},{"key":"398_CR10","doi-asserted-by":"crossref","unstructured":"Bengio Y, Louradour J, Collobert R, Weston J (2009) Curriculum learning. In: International conference on machine learning (ICML), pp 41\u201348","DOI":"10.1145\/1553374.1553380"},{"key":"398_CR11","unstructured":"Bojarski M, Del\u00a0Testa D, Dworakowski D, Firner B, Flepp B, Goyal P, Jackel LD, Monfort M, Muller U, Zhang J et\u00a0al (2016) End to end learning for self-driving cars. arXiv preprint arXiv:1604.07316"},{"key":"398_CR12","unstructured":"Brockman G, Cheung V, Pettersson L, Schneider J, Schulman J, Tang J, Zaremba W (2016) OpenAI gym. arXiv preprint arXiv:1606.01540"},{"key":"398_CR13","doi-asserted-by":"crossref","unstructured":"Cabi S, Colmenarejo SG, Novikov A, Konyushkova K, Reed S, Jeong R, Zolna K, Aytar Y, Budden D, Vecerik M et\u00a0al (2019) Scaling data-driven robotics with reward sketching and batch reinforcement learning. arXiv preprint arXiv:1909.12200","DOI":"10.15607\/RSS.2020.XVI.076"},{"issue":"3","key":"398_CR14","first-page":"4218","volume":"5","author":"P Cai","year":"2020","unstructured":"Cai P, Wang S, Sun Y, Liu M (2020) Probabilistic end-to-end vehicle navigation in complex dynamic environments with multimodal sensor fusion. IEEE Robot Autom Lett 5(3):4218\u20134224","journal-title":"IEEE Robot Autom Lett"},{"key":"398_CR15","doi-asserted-by":"crossref","unstructured":"Caicedo JC, Lazebnik S (2015) Active object localization with deep reinforcement learning. In: International conference on computer vision, pp 2488\u20132496","DOI":"10.1109\/ICCV.2015.286"},{"key":"398_CR16","unstructured":"Caicedo JC, Lazebnik S (2017) Action-decision networks for visual tracking with deep reinforcement learning. In: IEEE international conference on imaging, vision and pattern recognition, pp 2711\u20132720"},{"key":"398_CR17","doi-asserted-by":"crossref","unstructured":"Calli B, Singh A, Walsman A, Srinivasa S, Abbeel P, Dollar AM (2015) The YCB object and model set: towards common benchmarks for manipulation research. In: International conference on advanced robotics (ICAR), pp 510\u2013517","DOI":"10.1109\/ICAR.2015.7251504"},{"key":"398_CR18","unstructured":"Campos V, Trott A, Xiong C, Socher R, Gir\u00f3-i Nieto X, Torres J (2020) Explore, discover and learn: Unsupervised discovery of state-covering skills. In: International conference on machine learning. PMLR, pp 1317\u20131327"},{"key":"398_CR19","doi-asserted-by":"crossref","unstructured":"Canny J (1988) Some algebraic and geometric computations in PSPACE. In: ACM symposium on theory of computing (STOC), pp 460\u2013467","DOI":"10.1145\/62212.62257"},{"key":"398_CR20","doi-asserted-by":"crossref","unstructured":"Canny J, Reif J (1987) New lower bound techniques for robot motion planning problems. In: IEEE symposium on foundations of computer science (FOCS), pp 49\u201360","DOI":"10.1109\/SFCS.1987.42"},{"key":"398_CR21","doi-asserted-by":"crossref","unstructured":"Chebotar Y, Handa A, Makoviychuk V, Macklin M, Issac J, Ratliff N, Fox D (2019) Closing the sim-to-real loop: adapting simulation randomization with real world experience. In: IEEE international conference on robotics and automation (ICRA), pp 8973\u20138979","DOI":"10.1109\/ICRA.2019.8793789"},{"key":"398_CR22","unstructured":"Chen B, Dai B, Lin Q, Ye G, Liu H, Song L (2019) Learning to plan in high dimensions via neural exploration-exploitation trees. In: International conference on learning representations (ICLR)"},{"key":"398_CR23","doi-asserted-by":"crossref","unstructured":"Chen T, Zhai X, Ritter M, Lucic M, Houlsby N (2019) Self-supervised GANs via auxiliary rotation loss. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 12154\u201312163","DOI":"10.1109\/CVPR.2019.01243"},{"key":"398_CR24","unstructured":"Chen X, Fan H, Girshick R, He K (2020) Improved baselines with momentum contrastive learning. arXiv preprint arXiv:2003.04297"},{"key":"398_CR25","doi-asserted-by":"crossref","unstructured":"Chiang HT, Malone N, Lesser K, Oishi M, Tapia L (2015) Aggressive moving obstacle avoidance using a stochastic reachable set based potential field. In: International workshop on the algorithmic foundations of robotics (WAFR), pp 73\u201389","DOI":"10.1007\/978-3-319-16595-0_5"},{"key":"398_CR26","doi-asserted-by":"crossref","unstructured":"Chiang HTL, Faust A, Sugaya S, Tapia L (2018) Fast swept volume estimation with deep learning. In: International workshop on the algorithmic foundations of robotics (WAFR), pp 52\u201368","DOI":"10.1007\/978-3-030-44051-0_4"},{"key":"398_CR27","doi-asserted-by":"publisher","first-page":"2007","DOI":"10.1109\/LRA.2019.2899918","volume":"56","author":"HTL Chiang","year":"2019","unstructured":"Chiang HTL, Faust M, Fiser M, Frances A (2019) Learning navigation behaviors end to end with auto-RL. IEEE Robot Autom Lett 56:2007\u20132014","journal-title":"IEEE Robot Autom Lett"},{"issue":"4","key":"398_CR28","doi-asserted-by":"publisher","first-page":"4298","DOI":"10.1109\/LRA.2019.2931199","volume":"4","author":"HTL Chiang","year":"2019","unstructured":"Chiang HTL, Hsu J, Fiser M, Tapia L, Faust A (2019) RL-RRT: kinodynamic motion planning via learning reachability estimators from RL policies. IEEE Robot Autom Lett 4(4):4298\u20134305","journal-title":"IEEE Robot Autom Lett"},{"key":"398_CR29","doi-asserted-by":"crossref","unstructured":"Codevilla F, Miiller M, L\u00f3pez A, Koltun V, Dosovitskiy A (2018) End-to-end driving via conditional imitation learning. In: IEEE international conference on robotics and automation (ICRA), pp 1\u20139","DOI":"10.1109\/ICRA.2018.8460487"},{"issue":"5","key":"398_CR30","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1038\/s42256-019-0050-3","volume":"1","author":"M Crosby","year":"2019","unstructured":"Crosby M, Beyret B, Halina M (2019) The animal-ai olympics. Nat Mach Intell 1(5):257\u2013257","journal-title":"Nat Mach Intell"},{"key":"398_CR31","unstructured":"Dasari S, Ebert F, Tian S, Nair S, Bucher B, Schmeckpeper K, Singh S, Levine S, Finn C (2019) Robonet: large-scale multi-robot learning. arXiv preprint arXiv:1910.11215"},{"key":"398_CR32","unstructured":"Dasgupta I, Wang J, Chiappa S, Mitrovic J, Ortega P, Raposo D, Hughes E, Battaglia P, Botvinick M, Kurth-Nelson Z (2019) Causal reasoning from meta-reinforcement learning. arXiv preprint arXiv:1901.08162"},{"key":"398_CR33","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li LJ, Li K, Fei-Fei L (2009) ImageNet: a large-scale hierarchical image database. In: IEEE conference on computer vision and pattern recognition (CVPR), pp 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"398_CR34","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2021.103799","author":"A Devo","year":"2021","unstructured":"Devo A, Dionigi A, Costante G (2021) Enhancing continuous control of mobile robots for end-to-end visual active tracking. Robot Autonom Syst. https:\/\/doi.org\/10.1016\/j.robot.2021.103799","journal-title":"Robot Autonom Syst"},{"issue":"5","key":"398_CR35","doi-asserted-by":"publisher","first-page":"485","DOI":"10.1177\/0278364909359210","volume":"29","author":"D Dolgov","year":"2010","unstructured":"Dolgov D, Thrun S, Montemerlo M, Diebel J (2010) Path planning for autonomous vehicles in unknown semi-structured environments. Int J Robot Res 29(5):485\u2013501","journal-title":"Int J Robot Res"},{"key":"398_CR36","doi-asserted-by":"crossref","unstructured":"Dosovitskiy A, Fischer P, Ilg E, Hausser P, Hazirbas C, Golkov V, Van Der\u00a0Smagt P, Cremers D, Brox T (2015) FlowNet: learning optical flow with convolutional networks. In: IEEE international conference on computer vision (ICCV), pp 2758\u20132766","DOI":"10.1109\/ICCV.2015.316"},{"key":"398_CR37","unstructured":"Dosovitskiy A, Ros G, Codevilla F, Lopez A, Koltun V (2017) Carla: an open urban driving simulator. In: Conference on robot learning (CoRL), pp 1\u201316"},{"key":"398_CR38","doi-asserted-by":"crossref","unstructured":"Driess D, Oguz O, Ha JS, Toussaint M (2020) Deep visual heuristics: learning feasibility of mixed-integer programs for manipulation planning. In: 2020 IEEE international conference on robotics and automation (ICRA). IEEE, pp 9563\u20139569","DOI":"10.1109\/ICRA40945.2020.9197291"},{"issue":"10","key":"398_CR39","doi-asserted-by":"publisher","first-page":"1517","DOI":"10.1109\/TNN.2011.2160459","volume":"22","author":"R Elwell","year":"2011","unstructured":"Elwell R, Polikar R (2011) Incremental learning of concept drift in nonstationary environments. IEEE Trans Neural Netw 22(10):1517\u20131531. https:\/\/doi.org\/10.1109\/TNN.2011.2160459","journal-title":"IEEE Trans Neural Netw"},{"key":"398_CR40","unstructured":"Fabisch A, Petzoldt C, Otto M, Kirchner F (2019) A survey of behavior learning applications in robotics-state of the art and perspectives. arXiv preprint arXiv:1906.01868"},{"key":"398_CR41","doi-asserted-by":"crossref","unstructured":"Fairbank M, Alonso E (2012) The divergence of reinforcement learning algorithms with value-iteration and function approximation. In: IEEE international joint conference on neural networks (IJCNN), pp 1\u20138","DOI":"10.1109\/IJCNN.2012.6252792"},{"key":"398_CR42","doi-asserted-by":"crossref","unstructured":"Faust A, Oslund K, Ramirez O, Francis A, Tapia L, Fiser M, Davidson J (2018) PRM-RL: long-range robotic navigation tasks by combining reinforcement learning and sampling-based planning. In: IEEE international conference on robotics and automation (ICRA), pp 5113\u20135120","DOI":"10.1109\/ICRA.2018.8461096"},{"key":"398_CR43","unstructured":"Fern\u00e1ndez IMR, Sutanto G, Englert P, Ramachandran RK, Sukhatme GS (2020) Learning manifolds for sequential motion planning. arXiv preprint arXiv:2006.07746"},{"key":"398_CR44","unstructured":"Fortunato M, Azar MG, Piot B, Menick J, Hessel M, Osband I, Graves A, Mnih V, Munos R, Hassabis D et\u00a0al (2018) Noisy networks for exploration. In: International conference on learning representations (ICLR)"},{"key":"398_CR45","doi-asserted-by":"crossref","unstructured":"Fox D (2001) KLD-sampling: adaptive particle filters, pp 713\u2013720","DOI":"10.7551\/mitpress\/1120.003.0096"},{"key":"398_CR46","unstructured":"Fujimoto S, Hoof H, Meger D (2018) Addressing function approximation error in actor-critic methods. In: International conference on machine learning. PMLR, pp 1587\u20131596"},{"key":"398_CR47","unstructured":"Gao W, Hsu D, Lee WS, Shen S, Subramanian K (2017) Intention-net: integrating planning and deep learning for goal-directed autonomous navigation. In: Conference on robot learning (CoRL), pp 185\u2013194"},{"issue":"2","key":"398_CR48","doi-asserted-by":"publisher","first-page":"577","DOI":"10.1109\/LRA.2016.2645124","volume":"2","author":"C Garcia Cifuentes","year":"2016","unstructured":"Garcia Cifuentes C, Issac J, W\u00fcthrich M, Schaal S, Bohg J (2016) Probabilistic articulated real-time tracking for robot manipulation. IEEE Robot Autom Lett RA-L 2(2):577\u2013584","journal-title":"IEEE Robot Autom Lett RA-L"},{"key":"398_CR49","doi-asserted-by":"crossref","unstructured":"Garg A, Chiang HTL, Sugaya S, Faust A, Tapia L (2019) Comparison of deep reinforcement learning policies to formal methods for moving obstacle avoidance. In: IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 3534\u20133541","DOI":"10.1109\/IROS40897.2019.8967945"},{"key":"398_CR50","doi-asserted-by":"crossref","unstructured":"Gershman SJ (2017) Reinforcement learning and causal models. The Oxford handbook of causal reasoning, p 295","DOI":"10.1093\/oxfordhb\/9780199399550.013.20"},{"key":"398_CR51","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3101861","author":"J Gonzalez-Trejo","year":"2021","unstructured":"Gonzalez-Trejo J, Mercado-Ravell DA, Becerra I, Murrieta-Cid R (2021) On the visual-based safe landing of UAVS in populated areas: a crucial aspect for urban deployment. IEEE Robot Autom Lett. https:\/\/doi.org\/10.1109\/LRA.2021.3101861","journal-title":"IEEE Robot Autom Lett"},{"key":"398_CR52","volume-title":"Deep learning","author":"I Goodfellow","year":"2016","unstructured":"Goodfellow I, Bengio Y, Courville A, Bengio Y (2016) Deep learning, vol 1. MIT Press, Cambridge"},{"key":"398_CR53","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y (2014) Generative adversarial nets. In: Advances in neural information processing systems (NIPS), pp 2672\u20132680"},{"issue":"2","key":"398_CR54","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1002\/rob.20276","volume":"26","author":"R Hadsell","year":"2009","unstructured":"Hadsell R, Sermanet P, Ben J, Erkan A, Scoffier M, Kavukcuoglu K, Muller U, LeCun Y (2009) Learning long-range vision for autonomous off-road driving. J Field Robot 26(2):120\u2013144","journal-title":"J Field Robot"},{"key":"398_CR55","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: IEEE conference on computer vision and pattern recognition (CVPR), pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"398_CR56","doi-asserted-by":"crossref","unstructured":"Heiden E, Millard D, Coumans E, Sukhatme GS (2020) Augmenting differentiable simulators with neural networks to close the Sim2Real gap. arXiv preprint arXiv:2007.06045","DOI":"10.1109\/ICRA48506.2021.9560935"},{"key":"398_CR57","unstructured":"Henaff O (2020) Data-efficient image recognition with contrastive predictive coding. In: International conference on machine learning. PMLR, pp 4182\u20134192"},{"key":"398_CR58","unstructured":"Hernandez-Garcia JF, Sutton RS (2019) Understanding multi-step deep reinforcement learning: a systematic study of the DQN target. arXiv preprint arXiv:1901.07510"},{"key":"398_CR59","doi-asserted-by":"crossref","unstructured":"Hessel M, Modayil J, van Hasselt H, Schaul T, Ostrovski G, Dabney W, Horgan D, Piot B, Azar MG, Silver D (2018) Rainbow: combining improvements in deep reinforcement learning. In: AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"398_CR60","doi-asserted-by":"crossref","unstructured":"Higuera JCG, Meger D, Dudek G (2017) Adapting learned robotics behaviours through policy adjustment. In: IEEE international conference on robotics and automation (ICRA), pp 5837\u20135843","DOI":"10.1109\/ICRA.2017.7989686"},{"key":"398_CR61","doi-asserted-by":"crossref","unstructured":"Hirose N, Sadeghian A, V\u00e1zquez M, Goebel P, Savarese S (2018) Gonet: a semi-supervised deep learning approach for traversability estimation. In: 2018 IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 3044\u20133051","DOI":"10.1109\/IROS.2018.8594031"},{"key":"398_CR62","doi-asserted-by":"crossref","unstructured":"Hirose N, Sadeghian A, V\u00e1zquez M, Goebel P, Savarese S (2018) Gonet: a semi-supervised deep learning approach for traversability estimation. In: IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 3044\u20133051","DOI":"10.1109\/IROS.2018.8594031"},{"issue":"2","key":"398_CR63","doi-asserted-by":"publisher","first-page":"2062","DOI":"10.1109\/LRA.2019.2894869","volume":"4","author":"N Hirose","year":"2019","unstructured":"Hirose N, Sadeghian A, Xia F, Mart\u00edn-Mart\u00edn R, Savarese S (2019) VUNet: dynamic scene view synthesis for traversability estimation using an RGB camera. IEEE Robot Autom Lett 4(2):2062\u20132069","journal-title":"IEEE Robot Autom Lett"},{"issue":"4","key":"398_CR64","doi-asserted-by":"publisher","first-page":"3184","DOI":"10.1109\/LRA.2019.2925731","volume":"4","author":"N Hirose","year":"2019","unstructured":"Hirose N, Xia F, Mart\u00edn-Mart\u00edn R, Sadeghian A, Savarese S (2019) Deep visual MPC-policy learning for navigation. IEEE Robot Autom Lett RA-L 4(4):3184\u20133191","journal-title":"IEEE Robot Autom Lett RA-L"},{"key":"398_CR65","unstructured":"Ho SB (2017) Causal learning versus reinforcement learning for knowledge learning and problem solving. In: AAAI workshops"},{"key":"398_CR66","unstructured":"Hoffman J, Tzeng E, Park T, Zhu JY, Isola P, Saenko K, Efros A, Darrell T (2018) Cycada: cycle-consistent adversarial domain adaptation. In: International conference on machine learning (ICML), pp 1989\u20131998"},{"key":"398_CR67","unstructured":"Horgan D, Quan J, Budden D, Barth-Maron G, Hessel M, van Hasselt H, Silver D (2018) Distributed prioritized experience replay. In: International conference on learning representations (ICLR)"},{"key":"398_CR68","unstructured":"Ibarz B, Leike J, Pohlen T, Irving G, Legg S, Amodei D (2018) Reward learning from human preferences and demonstrations in Atari. arXiv preprint arXiv:1811.06521"},{"key":"398_CR69","doi-asserted-by":"crossref","unstructured":"Ichter B, Harrison J, Pavone M (2018) Learning sampling distributions for robot motion planning. In: IEEE international conference on robotics and automation (ICRA), pp 7087\u20137094","DOI":"10.1109\/ICRA.2018.8460730"},{"issue":"3","key":"398_CR70","doi-asserted-by":"publisher","first-page":"2407","DOI":"10.1109\/LRA.2019.2901898","volume":"4","author":"B Ichter","year":"2019","unstructured":"Ichter B, Pavone M (2019) Robot motion planning in learned latent spaces. IEEE Robot Autom Lett 4(3):2407\u20132414","journal-title":"IEEE Robot Autom Lett"},{"key":"398_CR71","doi-asserted-by":"crossref","unstructured":"Ichter B, Schmerling E, Lee TWE, Faust A (2020) Learned critical probabilistic roadmaps for robotic motion planning. In: IEEE international conference on robotics and automation (ICRA), pp 9535\u20139541","DOI":"10.1109\/ICRA40945.2020.9197106"},{"key":"398_CR72","doi-asserted-by":"crossref","unstructured":"Ilg E, Mayer N, Saikia T, Keuper M, Dosovitskiy A, Brox T (2017) Flownet 2.0: evolution of optical flow estimation with deep networks. In: IEEE conference on computer vision and pattern recognition (CVPR), pp 2462\u20132470","DOI":"10.1109\/CVPR.2017.179"},{"key":"398_CR73","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: accelerating deep network training by reducing internal covariate shift. In: International conference on machine learning. PMLR, pp 448\u2013456"},{"issue":"1","key":"398_CR74","doi-asserted-by":"publisher","first-page":"2","DOI":"10.3390\/technologies9010002","volume":"9","author":"A Jaiswal","year":"2021","unstructured":"Jaiswal A, Babu AR, Zadeh MZ, Banerjee D, Makedon F (2021) A survey on contrastive self-supervised learning. Technologies 9(1):2","journal-title":"Technologies"},{"issue":"2","key":"398_CR75","doi-asserted-by":"publisher","first-page":"3019","DOI":"10.1109\/LRA.2020.2974707","volume":"5","author":"S James","year":"2020","unstructured":"James S, Ma Z, Arrojo DR, Davison AJ (2020) RLBench: the robot learning benchmark & learning environment. IEEE Robot Autom Lett 5(2):3019\u20133026","journal-title":"IEEE Robot Autom Lett"},{"key":"398_CR76","doi-asserted-by":"crossref","unstructured":"Jing L, Tian Y (2020) Self-supervised visual feature learning with deep neural networks: a survey. IEEE Trans Pattern Anal Mach Intell","DOI":"10.1109\/TPAMI.2020.2992393"},{"key":"398_CR77","unstructured":"Julian R, Swanson B, Sukhatme GS, Levine S, Finn C, Hausman K (2020) Efficient adaptation for end-to-end vision-based robotic manipulation. arXiv preprint arXiv:2004.10190"},{"key":"398_CR78","doi-asserted-by":"crossref","unstructured":"Kahn G, Abbeel P, Levine S (2020) Badgr: an autonomous self-supervised learning-based navigation system. arXiv preprint arXiv:2002.05700","DOI":"10.1109\/LRA.2021.3057023"},{"key":"398_CR79","unstructured":"Kalashnikov D, Irpan A, Pastor P, Ibarz J, Herzog A, Jang E, Quillen D, Holly E, Kalakrishnan M, Vanhoucke V et\u00a0al (2018) Scalable deep reinforcement learning for vision-based robotic manipulation. In: Conference on robot learning (CoRL), pp 651\u2013673"},{"key":"398_CR80","unstructured":"Kapturowski S, Ostrovski G, Quan J, Munos R, Dabney W (2018) Recurrent experience replay in distributed reinforcement learning. In: International conference on learning representations (ICLR)"},{"key":"398_CR81","unstructured":"Karkus P, Hsu D, Lee WS (2017) Qmdp-net: deep learning for planning under partial observability. In: Advances in neural information processing systems (NIPS), pp 4697\u20134707"},{"key":"398_CR82","doi-asserted-by":"crossref","unstructured":"Karkus P, Ma X, Hsu D, Kaelbling LP, Lee WS, Lozano-P\u00e9rez T (2019) Differentiable algorithm networks for composable robot learning. arXiv preprint arXiv:1905.11602","DOI":"10.15607\/RSS.2019.XV.039"},{"key":"398_CR83","doi-asserted-by":"crossref","unstructured":"K\u00e1roly AI, Galambos P, Kuti J, Rudas IJ (2020) Deep learning in robotics: survey on model structures and training strategies. IEEE Trans Syst Man Cybern Syst","DOI":"10.1109\/TSMC.2020.3018325"},{"key":"398_CR84","doi-asserted-by":"crossref","unstructured":"Kaufmann E, Loquercio A, Ranftl R, M\u00fcller M, Koltun V, Scaramuzza D (2020) Deep drone acrobatics. arXiv preprint arXiv:2006.05768","DOI":"10.15607\/RSS.2020.XVI.040"},{"key":"398_CR85","doi-asserted-by":"publisher","first-page":"151","DOI":"10.3389\/frobt.2019.00151","volume":"6","author":"R Kaushik","year":"2020","unstructured":"Kaushik R, Desreumaux P, Mouret JB (2020) Adaptive prior selection for repertoire-based online adaptation in robotics. Front Robot AI 6:151. https:\/\/doi.org\/10.3389\/frobt.2019.00151","journal-title":"Front Robot AI"},{"key":"398_CR86","doi-asserted-by":"crossref","unstructured":"Kirtas M, Tsampazis K, Passalis N, Tefas A (2020) Deepbots: a webots-based deep reinforcement learning framework for robotics. In: IFIP international conference on artificial intelligence applications and innovations. Springer, pp 64\u201375","DOI":"10.1007\/978-3-030-49186-4_6"},{"issue":"11","key":"398_CR87","doi-asserted-by":"publisher","first-page":"1238","DOI":"10.1177\/0278364913495721","volume":"32","author":"J Kober","year":"2013","unstructured":"Kober J, Bagnell JA, Peters J (2013) Reinforcement learning in robotics: a survey. Int J Robot Res 32(11):1238\u20131274","journal-title":"Int J Robot Res"},{"key":"398_CR88","doi-asserted-by":"crossref","unstructured":"Kong J, Pfeiffer M, Schildbach G, Borrelli F (2015) Kinematic and dynamic vehicle models for autonomous driving control design. In: IEEE Intelligent vehicles symposium (IV), pp 1094\u20131099","DOI":"10.1109\/IVS.2015.7225830"},{"key":"398_CR89","unstructured":"Kostrikov I, Yarats D, Fergus R (2020) Image augmentation is all you need: regularizing deep reinforcement learning from pixels. arXiv preprint arXiv:2004.13649"},{"issue":"6","key":"398_CR90","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) ImageNet classification with deep convolutional neural networks. Commun ACM 60(6):84\u201390","journal-title":"Commun ACM"},{"key":"398_CR91","doi-asserted-by":"crossref","unstructured":"Kumar R, Mandalika A, Choudhury S, Srinivasa S (2019) Lego: leveraging experience in roadmap generation for sampling-based planning. In: IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 1488\u20131495","DOI":"10.1109\/IROS40897.2019.8968503"},{"key":"398_CR92","doi-asserted-by":"crossref","unstructured":"Kuutti S, Bowden R, Jin Y, Barber P, Fallah S (2020) A survey of deep learning applications to autonomous vehicle control. IEEE Trans Intell Transp Syst","DOI":"10.1109\/TITS.2019.2962338"},{"key":"398_CR93","doi-asserted-by":"crossref","unstructured":"Lamb L, Garcez A, Gori M, Prates M, Avelar P, Vardi M (2020) Graph neural networks meet neural-symbolic computing: A survey and perspective. arXiv preprint arXiv:2003.00330","DOI":"10.24963\/ijcai.2020\/679"},{"key":"398_CR94","doi-asserted-by":"crossref","unstructured":"Lecarpentier E, Abel D, Asadi K, Jinnai Y, Rachelson E, Littman ML (2020) Lipschitz lifelong reinforcement learning. arXiv preprint arXiv:2001.05411","DOI":"10.1609\/aaai.v35i9.17006"},{"issue":"7553","key":"398_CR95","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun Y, Bengio Y, Hinton G (2015) Deep learning. Nature 521(7553):436\u2013444","journal-title":"Nature"},{"key":"398_CR96","unstructured":"LeCun Y, Muller U, Ben J, Cosatto E, Flepp B (2006) Off-road obstacle avoidance through end-to-end learning. In: Advances in neural information processing systems (NIPS), pp 739\u2013746"},{"key":"398_CR97","unstructured":"Lee JH, Han MK, Ko DW, Suh IH (2019) From big to small: multi-scale local planar guidance for monocular depth estimation. arXiv preprint arXiv:1907.10326"},{"key":"398_CR98","unstructured":"Lee K, Smith L, Abbeel P (2021) Pebble: feedback-efficient interactive reinforcement learning via relabeling experience and unsupervised pre-training. arXiv preprint arXiv:2106.05091"},{"key":"398_CR99","doi-asserted-by":"crossref","unstructured":"Lee MA, Zhu Y, Srinivasan K, Shah P, Savarese S, Fei-Fei L, Garg A, Bohg J (2019) Making sense of vision and touch: self-supervised learning of multimodal representations for contact-rich tasks. In: IEEE international conference on robotics and automation (ICRA), pp 8943\u20138950","DOI":"10.1109\/ICRA.2019.8793485"},{"key":"398_CR100","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2016) Continuous control with deep reinforcement learning. In: International conference on learning representations (ICLR)\u2014poster"},{"key":"398_CR101","doi-asserted-by":"crossref","unstructured":"Lippi M, Poklukar P, Welle MC, Varava A, Yin H, Marino A, Kragic D (2020) Latent space roadmap for visual action planning of deformable and rigid object manipulation. In: IEEE\/RSJ international conference on intelligent robots and systems (IROS)","DOI":"10.1109\/IROS45743.2020.9340764"},{"issue":"10","key":"398_CR102","doi-asserted-by":"publisher","first-page":"2024","DOI":"10.1109\/TPAMI.2015.2505283","volume":"38","author":"F Liu","year":"2015","unstructured":"Liu F, Shen C, Lin G, Reid I (2015) Learning depth from single monocular images using deep convolutional neural fields. IEEE Trans Pattern Anal Mach Intell 38(10):2024\u20132039","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"398_CR103","unstructured":"Liu H, Abbeel P (2021) Behavior from the void: Unsupervised active pre-training"},{"key":"398_CR104","doi-asserted-by":"crossref","unstructured":"Liu K, Stadler M, Roy N (2020) Learned sampling distributions for efficient planning in hybrid geometric and object-level representations. In: IEEE international conference on robotics and automation (ICRA), pp 9555\u20139562","DOI":"10.1109\/ICRA40945.2020.9196771"},{"issue":"2","key":"398_CR105","doi-asserted-by":"publisher","first-page":"1088","DOI":"10.1109\/LRA.2018.2795643","volume":"3","author":"A Loquercio","year":"2018","unstructured":"Loquercio A, Maqueda AI, Del-Blanco CR, Scaramuzza D (2018) DroNet: learning to fly by driving. IEEE Robot Autom Lett 3(2):1088\u20131095","journal-title":"IEEE Robot Autom Lett"},{"key":"398_CR106","unstructured":"Lowe R, Wu Y, Tamar A, Harb J, Abbeel P, Mordatch I (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. arXiv preprint arXiv:1706.02275"},{"key":"398_CR107","unstructured":"Luo W, Sun P, Zhong F, Liu W, Zhang T, Wang Y (2018) End-to-end active object tracking via reinforcement learning. In: International conference on machine learning, pp 3286\u20133295"},{"issue":"6","key":"398_CR108","doi-asserted-by":"publisher","first-page":"1317","DOI":"10.1109\/TPAMI.2019.2899570","volume":"42","author":"W Luo","year":"2019","unstructured":"Luo W, Sun P, Zhong F, Liu W, Zhang T, Wang Y (2019) End-to-end active object tracking and its real-world deployment via reinforcement learning. IEEE Trans Pattern Anal Mach Intell 42(6):1317\u20131332","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"398_CR109","doi-asserted-by":"crossref","unstructured":"Madumal P, Miller T, Sonenberg L, Vetere F (2020) Explainable reinforcement learning through a causal lens. In: Proceedings of the AAAI conference on artificial intelligence, vol 34, pp 2493\u20132500","DOI":"10.1609\/aaai.v34i03.5631"},{"key":"398_CR110","unstructured":"Mao J, Gan C, Kohli P, Tenenbaum JB, Wu J (2019) The neuro-symbolic concept learner: interpreting scenes, words, and sentences from natural supervision. arXiv preprint arXiv:1904.12584"},{"key":"398_CR111","doi-asserted-by":"crossref","unstructured":"McCarty SL, Burke LM, McGuire M (2018) Parallel monotonic basin hopping for low thrust trajectory optimization. In: AAS\/AIAA space flight mechanics meeting, p 1452","DOI":"10.2514\/6.2018-1452"},{"key":"398_CR112","doi-asserted-by":"publisher","first-page":"224","DOI":"10.1016\/j.patrec.2020.02.024","volume":"133","author":"M Mendoza","year":"2020","unstructured":"Mendoza M, Vasquez-Gomez JI, Taud H, Sucar LE, Reta C (2020) Supervised learning of the next-best-view for 3D object reconstruction. Pattern Recognit Lett 133:224\u2013231","journal-title":"Pattern Recognit Lett"},{"key":"398_CR113","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2021.3069132","author":"WX Merkt","year":"2021","unstructured":"Merkt WX, Ivan V, Dinev T, Havoutis I, Vijayakumar S (2021) Memory clustering using persistent homology for multimodality- and discontinuity-sensitive learning of optimal control warm-starts. IEEE Trans Robot. https:\/\/doi.org\/10.1109\/TRO.2021.3069132","journal-title":"IEEE Trans Robot"},{"key":"398_CR114","unstructured":"Mnih V, Badia AP, Mirza M, Graves A, Lillicrap T, Harley T, Silver D, Kavukcuoglu K (2016) Asynchronous methods for deep reinforcement learning. In: International conference on machine learning (ICML), pp 1928\u20131937"},{"issue":"7540","key":"398_CR115","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"key":"398_CR116","doi-asserted-by":"crossref","unstructured":"Molchanov A, Chen T, H\u00f6nig W, Preiss JA, Ayanian N, Sukhatme GS (2019) Sim-to-(multi)-real: transfer of low-level robust control policies to multiple quadrotors. arXiv preprint arXiv:1903.04628","DOI":"10.1109\/IROS40897.2019.8967695"},{"key":"398_CR117","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2021.3057802","author":"AS Morgan","year":"2021","unstructured":"Morgan AS, Bircher WG, Dollar AM (2021) Towards generalized manipulation learning through grasp mechanics-based features and self-supervision. IEEE Trans Robot. https:\/\/doi.org\/10.1109\/TRO.2021.3057802","journal-title":"IEEE Trans Robot"},{"key":"398_CR118","unstructured":"Nagabandi A, Finn C, Levine S (2019) Deep online learning via meta-learning: continual adaptation for model-based RL"},{"key":"398_CR119","unstructured":"Nagabandi A, Konolige K, Levine S, Kumar V (2020) Deep dynamics models for learning dexterous manipulation. In: Conference on robot learning (CoRL), pp 1101\u20131112"},{"key":"398_CR120","doi-asserted-by":"crossref","unstructured":"Nagami K, Schwager M (2021) Hjb-rl: Initializing reinforcement learning with optimal control policies applied to autonomous drone racing. In: Robotics: science and systems, pp 1\u20139","DOI":"10.15607\/RSS.2021.XVII.062"},{"key":"398_CR121","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1016\/j.artint.2015.09.013","volume":"247","author":"TT Nguyen","year":"2017","unstructured":"Nguyen TT, Silander T, Li Z, Leong TY (2017) Scalable transfer learning in heterogeneous, dynamic environments. Artif Intell 247:70\u201394. https:\/\/doi.org\/10.1016\/j.artint.2015.09.013","journal-title":"Artif Intell"},{"key":"398_CR122","unstructured":"Oord A, Dieleman S, Zen H, Simonyan K, Vinyals O, Graves A, Kalchbrenner N, Senior A, Kavukcuoglu K (2016) Wavenet: a generative model for raw audio. arXiv preprint arXiv:1609.03499"},{"key":"398_CR123","doi-asserted-by":"crossref","unstructured":"Owens A, Efros AA (2018) Audio-visual scene analysis with self-supervised multisensory features. In: European conference on computer vision (ECCV), pp 631\u2013648","DOI":"10.1007\/978-3-030-01231-1_39"},{"key":"398_CR124","doi-asserted-by":"crossref","unstructured":"Pan X, Seita D, Gao Y, Canny J (2019) Risk averse robust adversarial reinforcement learning. In: IEEE international conference on robotics and automation (ICRA), pp 8522\u20138528","DOI":"10.1109\/ICRA.2019.8794293"},{"issue":"3","key":"398_CR125","doi-asserted-by":"publisher","first-page":"1544","DOI":"10.1109\/LRA.2018.2801475","volume":"3","author":"D Park","year":"2018","unstructured":"Park D, Hoshi Y, Kemp CC (2018) A multimodal anomaly detector for robot-assisted feeding using an LSTM-based variational autoencoder. IEEE Robot Autom Lett RA-L 3(3):1544\u20131551","journal-title":"IEEE Robot Autom Lett RA-L"},{"key":"398_CR126","doi-asserted-by":"crossref","unstructured":"Pfeiffer M, Schaeuble M, Nieto J, Siegwart R, Cadena C (2017) From perception to decision: a data-driven approach to end-to-end motion planning for autonomous ground robots. In: IEEE international conference on robotics and automation (ICRA), pp 1527\u20131533","DOI":"10.1109\/ICRA.2017.7989182"},{"key":"398_CR127","volume-title":"Markov decision processes: discrete stochastic dynamic programming","author":"ML Puterman","year":"2014","unstructured":"Puterman ML (2014) Markov decision processes: discrete stochastic dynamic programming. Wiley, New York"},{"key":"398_CR128","doi-asserted-by":"crossref","unstructured":"Qureshi AH, Miao Y, Simeonov A, Yip MC (2021) Motion planning networks: bridging the gap between learning-based and classical motion planners. IEEE Trans Robot","DOI":"10.1109\/TRO.2020.3006716"},{"key":"398_CR129","doi-asserted-by":"crossref","unstructured":"Qureshi AH, Simeonov A, Bency MJ, Yip MC (2019) Motion planning networks. In: IEEE international conference on robotics and automation (ICRA), pp 2118\u20132124","DOI":"10.1109\/ICRA.2019.8793889"},{"key":"398_CR130","doi-asserted-by":"crossref","unstructured":"Qureshi AH, Yip MC (2018) Deeply informed neural sampling for robot motion planning. In: IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 6582\u20136588","DOI":"10.1109\/IROS.2018.8593772"},{"issue":"4","key":"398_CR131","doi-asserted-by":"publisher","first-page":"4407","DOI":"10.1109\/LRA.2018.2869640","volume":"3","author":"N Radwan","year":"2018","unstructured":"Radwan N, Valada A, Burgard W (2018) Vlocnet++: deep multitask learning for semantic visual localization and odometry. IEEE Robot Autom Lett 3(4):4407\u20134414","journal-title":"IEEE Robot Autom Lett"},{"key":"398_CR132","doi-asserted-by":"crossref","unstructured":"Ranftl R, Koltun V (2018) Deep fundamental matrix estimation. In: European conference on computer vision (ECCV), pp 284\u2013299","DOI":"10.1007\/978-3-030-01246-5_18"},{"key":"398_CR133","unstructured":"Reddy DSK, Saha A, Tamilselvam SG, Agrawal P, Dayama P (2019) Risk averse reinforcement learning for mixed multi-agent environments. In: Proceedings of the 18th international conference on autonomous agents and multiagent systems, pp 2171\u20132173"},{"key":"398_CR134","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2021.103757","author":"EG Ribeiro","year":"2021","unstructured":"Ribeiro EG, de Queiroz Mendes R, Grassi V (2021) Real-time deep learning approach to visual servo control and grasp detection for autonomous robotic manipulation. Robot Auton Syst. https:\/\/doi.org\/10.1016\/j.robot.2021.103757","journal-title":"Robot Auton Syst"},{"issue":"1\u20132","key":"398_CR135","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1007\/s10994-006-5833-1","volume":"62","author":"M Richardson","year":"2006","unstructured":"Richardson M, Domingos P (2006) Markov logic networks. Mach Learn 62(1\u20132):107\u2013136","journal-title":"Mach Learn"},{"key":"398_CR136","unstructured":"Riegel R, Gray A, Luus F, Khan N, Makondo N, Akhalwaya IY, Qian H, Fagin R, Barahona F, Sharma U et\u00a0al (2020) Logical neural networks. arXiv preprint arXiv:2006.13155"},{"key":"398_CR137","doi-asserted-by":"crossref","unstructured":"Rifai S, Vincent P, Muller X, Glorot X, Bengio Y (2011) Contractive auto-encoders: explicit invariance during feature extraction. In: International conference on machine learning (ICML)","DOI":"10.1007\/978-3-642-23783-6_41"},{"key":"398_CR138","unstructured":"Ross S, Gordon G, Bagnell D (2011) A reduction of imitation learning and structured prediction to no-regret online learning. In: International conference on artificial intelligence and statistics (AISTATS), pp 627\u2013635"},{"key":"398_CR139","volume-title":"The cross-entropy method: a unified approach to combinatorial optimization, Monte-Carlo simulation and machine learning","author":"RY Rubinstein","year":"2013","unstructured":"Rubinstein RY, Kroese DP (2013) The cross-entropy method: a unified approach to combinatorial optimization, Monte-Carlo simulation and machine learning. Springer, New York"},{"issue":"11","key":"398_CR140","doi-asserted-by":"publisher","first-page":"1199","DOI":"10.1007\/s11263-018-1089-z","volume":"126","author":"M Ruder","year":"2018","unstructured":"Ruder M, Dosovitskiy A, Brox T (2018) Artistic style transfer for videos and spherical images. Int J Comput Vis 126(11):1199\u20131219","journal-title":"Int J Comput Vis"},{"key":"398_CR141","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2021.3084374","author":"N Rudin","year":"2021","unstructured":"Rudin N, Kolvenbach H, Tsounis V, Hutter M (2021) Cat-like jumping and landing of legged robots in low gravity using deep reinforcement learning. IEEE Trans Robot. https:\/\/doi.org\/10.1109\/TRO.2021.3084374","journal-title":"IEEE Trans Robot"},{"key":"398_CR142","unstructured":"Schaul T, Horgan D, Gregor K, Silver D (2015) Universal value function approximators. In: International conference on machine learning (ICML), pp 1312\u20131320"},{"key":"398_CR143","unstructured":"Schaul T, Quan J, Antonoglou I, Silver D (2015) Prioritized experience replay. arXiv preprint arXiv:1511.05952"},{"key":"398_CR144","unstructured":"Schulman J, Levine S, Abbeel P, Jordan M, Moritz P (2015) Trust region policy optimization. In: International conference on machine learning (ICML), pp 1889\u20131897"},{"key":"398_CR145","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347"},{"key":"398_CR146","unstructured":"Schwarzer M, Anand A, Goel R, Hjelm RD, Courville A, Bachman P (2020) Data-efficient reinforcement learning with self-predictive representations. arXiv preprint arXiv:2007.05929"},{"key":"398_CR147","unstructured":"Seo Y, Chen L, Shin J, Lee H, Abbeel P, Lee K (2021) State entropy maximization with random encoders for efficient exploration. arXiv preprint arXiv:2102.09430"},{"key":"398_CR148","unstructured":"Serafini L, Garcez Ad (2016) Logic tensor networks: deep learning and logical reasoning from data and knowledge. arXiv preprint arXiv:1606.04422"},{"key":"398_CR149","doi-asserted-by":"crossref","unstructured":"Shi W, Song S, Wu C (2019) Soft policy gradient method for maximum entropy deep reinforcement learning. In: International joint conference on artificial intelligence (IJCAI), pp 3425\u20133431","DOI":"10.24963\/ijcai.2019\/475"},{"key":"398_CR150","unstructured":"Silver D, Lever G, Heess N, Degris T, Wierstra D, Riedmiller M (2014) Deterministic policy gradient algorithms. In: International conference on machine learning (ICML)"},{"key":"398_CR151","unstructured":"Simonyan K, Zisserman A (2015) Very deep convolutional networks for large-scale image recognition. In: International conference on learning representations (ICLR)"},{"key":"398_CR152","unstructured":"Singh R, Zhang Q, Chen Y (2020) Improving robustness via risk averse distributional reinforcement learning. In: Learning for dynamics and control. PMLR, pp 958\u2013968"},{"issue":"3","key":"398_CR153","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1023\/A:1007678930559","volume":"38","author":"S Singh","year":"2000","unstructured":"Singh S, Jaakkola T, Littman ML, Szepesv\u00e1ri C (2000) Convergence results for single-step on-policy reinforcement-learning algorithms. Mach Learn 38(3):287\u2013308","journal-title":"Mach Learn"},{"key":"398_CR154","doi-asserted-by":"crossref","unstructured":"Smolyanskiy N, Kamenev A, Smith J, Birchfield S (2017) Toward low-flying autonomous mav trail navigation using deep neural networks for environmental awareness. In: IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 4241\u20134247","DOI":"10.1109\/IROS.2017.8206285"},{"key":"398_CR155","first-page":"3483","volume":"28","author":"K Sohn","year":"2015","unstructured":"Sohn K, Lee H, Yan X (2015) Learning structured output representation using deep conditional generative models. Adv Neural Inf Process Syst NIPS 28:3483\u20133491","journal-title":"Adv Neural Inf Process Syst NIPS"},{"key":"398_CR156","unstructured":"Srinivas A, Laskin M, Abbeel P (2020) Curl: contrastive unsupervised representations for reinforcement learning. arXiv preprint arXiv:2004.04136"},{"key":"398_CR157","unstructured":"Sukhbaatar S, Lin Z, Kostrikov I, Synnaeve G, Szlam A, Fergus R (2018) Intrinsic motivation and automatic curricula via asymmetric self-play. In: International conference on learning representations (ICLR)"},{"key":"398_CR158","doi-asserted-by":"publisher","first-page":"012035","DOI":"10.1088\/1742-6596\/1746\/1\/012035","volume":"1746","author":"T Sun","year":"2021","unstructured":"Sun T, Gong L, Li X, Xie S, Chen Z, Hu Q, Filliat D (2021) Robotdrlsim: a real time robot simulation platform for reinforcement learning and human interactive demonstration learning. J Phys Conf Ser 1746:012035","journal-title":"J Phys Conf Ser"},{"key":"398_CR159","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1016\/j.robot.2021.103757","volume":"45","author":"Z Sun","year":"2021","unstructured":"Sun Z, Li F, Duan X, Jin L, Lian Y, Liu S, Liu K (2021) Deep reinforcement learning for quadrotor path following with adaptive velocity. Auton Robots 45:119\u2013134. https:\/\/doi.org\/10.1016\/j.robot.2021.103757","journal-title":"Auton Robots"},{"key":"398_CR160","doi-asserted-by":"publisher","first-page":"595","DOI":"10.1016\/j.robot.2021.103757","volume":"45","author":"Z Sun","year":"2021","unstructured":"Sun Z, Li F, Duan X, Jin L, Lian Y, Liu S, Liu K (2021) A novel adaptive iterative learning control approach and human-in-the-loop control pattern for lower limb rehabilitation robot in disturbances environment. Auton Robots 45:595\u2013610. https:\/\/doi.org\/10.1016\/j.robot.2021.103757","journal-title":"Auton Robots"},{"issue":"4\u20135","key":"398_CR161","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1177\/0278364918770733","volume":"37","author":"N S\u00fcnderhauf","year":"2018","unstructured":"S\u00fcnderhauf N, Brock O, Scheirer W, Hadsell R, Fox D, Leitner J, Upcroft B, Abbeel P, Burgard W, Milford M et al (2018) The limits and potentials of deep learning for robotics. Int J Robot Res 37(4\u20135):405\u2013420","journal-title":"Int J Robot Res"},{"key":"398_CR162","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction. MIT Press, Cambridge"},{"key":"398_CR163","doi-asserted-by":"crossref","unstructured":"Tai L, Paolo G, Liu M (2017) Virtual-to-real deep reinforcement learning: continuous control of mobile robots for mapless navigation. In: IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 31\u201336","DOI":"10.1109\/IROS.2017.8202134"},{"key":"398_CR164","doi-asserted-by":"crossref","unstructured":"Tang G, Hauser K (2019) Discontinuity-sensitive optimal control learning by mixture of experts. In: 2019 international conference on robotics and automation (ICRA). IEEE, pp 7892\u20137898","DOI":"10.1109\/ICRA.2019.8793909"},{"key":"398_CR165","doi-asserted-by":"publisher","first-page":"192","DOI":"10.1016\/j.eswa.2017.09.023","volume":"92","author":"AC Tenorio-Gonz\u00e1lez","year":"2018","unstructured":"Tenorio-Gonz\u00e1lez AC, Morales EF (2018) Automatic discovery of concepts and actions. Expert Syst Appl 92:192\u2013205","journal-title":"Expert Syst Appl"},{"key":"398_CR166","doi-asserted-by":"crossref","unstructured":"Tenorio-Gonzalez AC, Morales EF, Villasenor-Pineda L (2010) Dynamic reward shaping: training a robot by voice. In: Ibero-American conference on artificial intelligence. Springer, pp 483\u2013492","DOI":"10.1007\/978-3-642-16952-6_49"},{"key":"398_CR167","doi-asserted-by":"crossref","unstructured":"Terasawa R, Ariki Y, Narihira T, Tsuboi T, Nagasaka K (2020) 3D-CNN based heuristic guided task-space planner for faster motion planning. In: IEEE international conference on robotics and automation (ICRA), pp 9548\u20139554","DOI":"10.1109\/ICRA40945.2020.9196883"},{"key":"398_CR168","doi-asserted-by":"crossref","unstructured":"Tesauro G (1992) Practical issues in temporal difference learning. In: Advances in neural information processing systems (NIPS), pp 259\u2013266","DOI":"10.1007\/978-1-4615-3618-5_3"},{"key":"398_CR169","unstructured":"Thrun S, Schwartz A (1993) Issues in using function approximation for reinforcement learning. In: Connectionist models summer school"},{"key":"398_CR170","unstructured":"To T, Tremblay J, McKay D, Yamaguchi Y, Leung K, Balanon A, Cheng J, Birchfield S (2018) Ndds: Nvidia deep learning dataset synthesizer. https:\/\/github.com\/NVIDIA\/Dataset_Synthesizer"},{"key":"398_CR171","doi-asserted-by":"crossref","unstructured":"Tobin J, Fong R, Ray A, Schneider J, Zaremba W, Abbeel P (2017) Domain randomization for transferring deep neural networks from simulation to the real world. In: IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 23\u201330","DOI":"10.1109\/IROS.2017.8202133"},{"key":"398_CR172","doi-asserted-by":"crossref","unstructured":"Todorov E, Erez T, Tassa Y (2012) Mujoco: a physics engine for model-based control. In: 2012 IEEE\/RSJ international conference on intelligent robots and systems. IEEE, pp 5026\u20135033","DOI":"10.1109\/IROS.2012.6386109"},{"key":"398_CR173","doi-asserted-by":"crossref","unstructured":"Tremblay J, To T, Birchfield S (2018) Falling things: a synthetic dataset for 3D object detection and pose estimation. In: IEEE conference on computer vision and pattern recognition (CVPR) workshops, pp 2038\u20132041","DOI":"10.1109\/CVPRW.2018.00275"},{"key":"398_CR174","unstructured":"Tremblay J, To T, Sundaralingam B, Xiang Y, Fox D, Birchfield S (2018) Deep object pose estimation for semantic robotic grasping of household objects. arXiv preprint arXiv:1809.10790"},{"key":"398_CR175","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-021-01412-3","author":"H Ugurlu","year":"2021","unstructured":"Ugurlu H, Kalkan S, Saranli A (2021) Reinforcement learning versus conventional control for controlling a planar bi-rotor platform with tail appendage. J Intell Robot Syst. https:\/\/doi.org\/10.1007\/s10846-021-01412-3","journal-title":"J Intell Robot Syst"},{"key":"398_CR176","doi-asserted-by":"crossref","unstructured":"Van\u00a0Hasselt H, Guez A, Silver D (2015) Deep reinforcement learning with double q-learning. arXiv preprint arXiv:1509.06461","DOI":"10.1609\/aaai.v30i1.10295"},{"issue":"42","key":"398_CR177","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s00138-020-01166-2","volume":"32","author":"JI Vasquez-Gomez","year":"2021","unstructured":"Vasquez-Gomez JI, Troncoso D, Becerra I, Sucar E, Murrieta-Cid R (2021) Next-best-view regression using a 3D convolutional neural network. Mach Vis Appl 32(42):1\u201314. https:\/\/doi.org\/10.1007\/s00138-020-01166-2","journal-title":"Mach Vis Appl"},{"issue":"5","key":"398_CR178","first-page":"1","volume":"53","author":"H Wang","year":"2020","unstructured":"Wang H, Yeung DY (2020) A survey on Bayesian deep learning. ACM Comput Surv CSUR 53(5):1\u201337","journal-title":"ACM Comput Surv CSUR"},{"issue":"2","key":"398_CR179","doi-asserted-by":"publisher","first-page":"621","DOI":"10.1109\/TMECH.2019.2899365","volume":"24","author":"Z Wang","year":"2019","unstructured":"Wang Z, Chen C, Li HX, Dong D, Tarn TJ (2019) Incremental reinforcement learning with prioritized sweeping for dynamic environments. IEEE\/ASME Trans Mechatron 24(2):621\u2013632","journal-title":"IEEE\/ASME Trans Mechatron"},{"issue":"6\u20137","key":"398_CR180","doi-asserted-by":"publisher","first-page":"866","DOI":"10.1177\/02783649211004615","volume":"40","author":"Z Wang","year":"2021","unstructured":"Wang Z, Reed Garrett C, Pack Kaelbling L, Lozano-P\u00e9rez T (2021) Learning compositional models of robot skills for task and motion planning. Int J Robot Res 40(6\u20137):866\u2013894. https:\/\/doi.org\/10.1177\/02783649211004615","journal-title":"Int J Robot Res"},{"key":"398_CR181","unstructured":"Wang Z, Schaul T, Hessel M, Hasselt H, Lanctot M, Freitas N (2016) Dueling network architectures for deep reinforcement learning. In: International conference on machine learning (ICML), pp 1995\u20132003"},{"issue":"3\u20134","key":"398_CR182","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJ Watkins","year":"1992","unstructured":"Watkins CJ, Dayan P (1992) Q-learning. Mach Learn 8(3\u20134):279\u2013292","journal-title":"Mach Learn"},{"issue":"2","key":"398_CR183","doi-asserted-by":"publisher","first-page":"1509","DOI":"10.1109\/LRA.2019.2895390","volume":"4","author":"L Wellhausen","year":"2019","unstructured":"Wellhausen L, Dosovitskiy A, Ranftl R, Walas K, Cadena C, Hutter M (2019) Where should i walk? Predicting terrain properties from images via self-supervised learning. IEEE Robot Autom Lett 4(2):1509\u20131516","journal-title":"IEEE Robot Autom Lett"},{"issue":"3\u20134","key":"398_CR184","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/BF00992696","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams RJ (1992) Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach Learn 8(3\u20134):229\u2013256","journal-title":"Mach Learn"},{"issue":"4","key":"398_CR185","doi-asserted-by":"publisher","first-page":"3113","DOI":"10.1109\/LRA.2019.2924125","volume":"4","author":"C Wu","year":"2019","unstructured":"Wu C, Zeng R, Pan J, Wang CC, Liu YJ (2019) Plant phenotyping by deep-learning-based planner for multi-robots. IEEE Robot Autom Lett 4(4):3113\u20133120","journal-title":"IEEE Robot Autom Lett"},{"key":"398_CR186","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1016\/j.neucom.2020.01.085","volume":"396","author":"X Wu","year":"2020","unstructured":"Wu X, Sahoo D, Hoi SC (2020) Recent advances in deep learning for object detection. Neurocomputing 396:39\u201364","journal-title":"Neurocomputing"},{"key":"398_CR187","doi-asserted-by":"crossref","unstructured":"Xiang Y, Schmidt T, Narayanan V, Fox D (2017) PoseCNN: a convolutional neural network for 6D object pose estimation in cluttered scenes. arXiv preprint arXiv:1711.00199","DOI":"10.15607\/RSS.2018.XIV.019"},{"key":"398_CR188","doi-asserted-by":"crossref","unstructured":"Xu H, Gao Y, Yu F, Darrell T (2017) End-to-end learning of driving models from large-scale video datasets. In: IEEE conference on computer vision and pattern recognition (CVPR), pp 2174\u20132182","DOI":"10.1109\/CVPR.2017.376"},{"key":"398_CR189","doi-asserted-by":"publisher","DOI":"10.1007\/s10846-021-01439-6","author":"C Yang","year":"2021","unstructured":"Yang C, Liu Y, Zell A (2021) Relative camera pose estimation using synthetic data with domain adaptation via cycle-consistent adversarial networks. J Intell Robot Syst. https:\/\/doi.org\/10.1007\/s10846-021-01439-6","journal-title":"J Intell Robot Syst"},{"key":"398_CR190","unstructured":"Yarats D, Fergus R, Lazaric A, Pinto L (2021) Reinforcement learning with prototypical representations"},{"key":"398_CR191","doi-asserted-by":"crossref","unstructured":"Zhang C, Huh J, Lee DD (2018) Learning implicit sampling distributions for motion planning. In: IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 3654\u20133661","DOI":"10.1109\/IROS.2018.8594028"},{"key":"398_CR192","unstructured":"Zhang J, Cheung B, Finn C, Levine S, Jayaraman D (2020) Cautious adaptation for reinforcement learning in safety-critical settings. In: International conference on machine learning. PMLR, pp 11055\u201311065"},{"issue":"2","key":"398_CR193","doi-asserted-by":"publisher","first-page":"1148","DOI":"10.1109\/LRA.2019.2894216","volume":"4","author":"J Zhang","year":"2019","unstructured":"Zhang J, Tai L, Yun P, Xiong Y, Liu M, Boedecker J, Burgard W (2019) VR-goggles for robots: real-to-sim domain adaptation for visual control. IEEE Robot Autom Lett RA-L 4(2):1148\u20131155","journal-title":"IEEE Robot Autom Lett RA-L"},{"key":"398_CR194","unstructured":"Zhang S, Liu B, Whiteson S (2020) Per-step reward: a new perspective for risk-averse reinforcement learning. arXiv preprint arXiv:2004.10888"},{"key":"398_CR195","doi-asserted-by":"crossref","unstructured":"Zhou T, Tulsiani S, Sun W, Malik J, Efros AA (2016) View synthesis by appearance flow. In: European conference on computer vision (ECCV), pp 286\u2013301","DOI":"10.1007\/978-3-319-46493-0_18"},{"key":"398_CR196","doi-asserted-by":"crossref","unstructured":"Zhu JY, Park T, Isola P, Efros AA (2017) Unpaired image-to-image translation using cycle-consistent adversarial networks. In: IEEE international conference on computer vision (ICCV), pp 2223\u20132232","DOI":"10.1109\/ICCV.2017.244"},{"key":"398_CR197","unstructured":"Zhu Z, Lin K, Zhou J (2020) Transfer learning in deep reinforcement learning: a survey. arXiv preprint arXiv:2009.07888"},{"key":"398_CR198","unstructured":"Zhu Z, Lin K, Zhou J (2020) Transfer learning in deep reinforcement learning: a survey. arXiv preprint arXiv:2009.07888"}],"container-title":["Intelligent Service Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11370-021-00398-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11370-021-00398-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11370-021-00398-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,12]],"date-time":"2023-11-12T18:51:19Z","timestamp":1699815079000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11370-021-00398-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,11]]},"references-count":198,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2021,11]]}},"alternative-id":["398"],"URL":"https:\/\/doi.org\/10.1007\/s11370-021-00398-z","relation":{},"ISSN":["1861-2776","1861-2784"],"issn-type":[{"value":"1861-2776","type":"print"},{"value":"1861-2784","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,11]]},"assertion":[{"value":"30 August 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 October 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 November 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or nonfinancial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}