{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:37:51Z","timestamp":1765546671628,"version":"3.40.3"},"publisher-location":"Cham","reference-count":34,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030625788"},{"type":"electronic","value":"9783030625795"}],"license":[{"start":{"date-parts":[[2020,11,3]],"date-time":"2020-11-03T00:00:00Z","timestamp":1604361600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,11,3]],"date-time":"2020-11-03T00:00:00Z","timestamp":1604361600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-62579-5_22","type":"book-chapter","created":{"date-parts":[[2020,11,2]],"date-time":"2020-11-02T06:02:40Z","timestamp":1604296960000},"page":"318-331","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Reinforcement Learning Experiments and Benchmark for Solving Robotic Reaching Tasks"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9939-5537","authenticated-orcid":false,"given":"Pierre","family":"Aumjaud","sequence":"first","affiliation":[]},{"given":"David","family":"McAuliffe","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8400-7079","authenticated-orcid":false,"given":"Francisco Javier","family":"Rodr\u00edguez-Lera","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4824-427X","authenticated-orcid":false,"given":"Philip","family":"Cardiff","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,11,3]]},"reference":[{"key":"22_CR1","unstructured":"Sutton, R., Barto, A.: Reinforcement Learning: An Introduction, 2nd edn. MIT Press, Cambridge (2018). \nhttp:\/\/www.incompleteideas.net\/book\/the-book-2nd.html"},{"issue":"7587","key":"22_CR2","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016). \nhttps:\/\/doi.org\/10.1038\/nature16961","journal-title":"Nature"},{"issue":"7540","key":"22_CR3","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015). \nhttps:\/\/doi.org\/10.1038\/nature14236","journal-title":"Nature"},{"key":"22_CR4","doi-asserted-by":"publisher","unstructured":"Deisenroth, M.P., Rasmussen, C.E.: PILCO: a model-based and data-efficient approach to policy search. In: Proceedings of the 28th International Conference on Machine Learning, pp. 465\u2013472 (2011). \nhttps:\/\/doi.org\/10.5555\/3104482.3104541","DOI":"10.5555\/3104482.3104541"},{"key":"22_CR5","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. In: 4th International Conference on Learning Representation (2016). \nhttp:\/\/arxiv.org\/abs\/1509.02971"},{"key":"22_CR6","unstructured":"Fujimoto, S., Van Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. In: 35th International Conference on Machine Learning, ICML 2018, vol. 4, pp. 2587\u20132601 (2018). \nhttp:\/\/arxiv.org\/abs\/1802.09477"},{"key":"22_CR7","unstructured":"Haarnoja, T., et al.: Soft actor-critic algorithms and applications. Computing Research Repository CoRR (2018). \nhttp:\/\/arxiv.org\/abs\/1812.05905"},{"key":"22_CR8","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning (2018). \nhttp:\/\/arxiv.org\/abs\/1801.01290"},{"key":"22_CR9","unstructured":"Andrychowicz, M., et al.: Hindsight experience replay. In: Advances in Neural Information Processing Systems, vol. 30, pp. 5048\u20135058 (2017). \nhttp:\/\/arxiv.org\/abs\/1707.01495"},{"key":"22_CR10","doi-asserted-by":"publisher","unstructured":"Devin, C., Gupta, A., Darrell, T., Abbeel, P., Levine, S.: Learning modular neural network policies for multi-task and multi-robot transfer. In: Proceedings - IEEE International Conference on Robotics and Automation, pp. 2169\u20132176 (2017). \nhttps:\/\/doi.org\/10.1109\/ICRA.2017.7989250","DOI":"10.1109\/ICRA.2017.7989250"},{"key":"22_CR11","unstructured":"Gupta, A., Devin, C., Liu, Y., Abbeel, P., Levine, S.: Learning invariant feature spaces to transfer skills with reinforcement learning. In: International Conference on Learning Representations (2017). \nhttp:\/\/arxiv.org\/abs\/1703.02949"},{"key":"22_CR12","unstructured":"Plappert, M., et al.: Multi-goal reinforcement learning: challenging robotics environments and request for research. Computing Research Repository CoRR (2018). \nhttp:\/\/arxiv.org\/abs\/1802.09464"},{"key":"22_CR13","unstructured":"Chen, T., Murali, A., Gupta, A.: Hardware conditioned policies for multi-robot transfer learning. In: 32nd Conference on Neural Information Processing Systems (NeurIPS) (2018). \nhttp:\/\/arxiv.org\/abs\/1811.09864"},{"key":"22_CR14","doi-asserted-by":"publisher","unstructured":"Rupam Mahmood, A., Korenkevych, D., Komer, B.J., Bergstra, J.: Setting up a reinforcement learning task with a real-world robot. In: IEEE International Conference on Intelligent Robots and Systems, pp. 4635\u20134640 (2018). \nhttps:\/\/doi.org\/10.1109\/IROS.2018.8593894","DOI":"10.1109\/IROS.2018.8593894"},{"key":"22_CR15","unstructured":"Tavakoli, A., Pardo, F., Kormushev, P.: Action branching architectures for deep reinforcement learning. In: 31st Conference on Neural Information Processing Systems (NIPS) (2017). \nhttp:\/\/arxiv.org\/abs\/1711.08946"},{"key":"22_CR16","unstructured":"Gu, S., Holly, E., Lillicrap, T., Levine, S.: Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. Computing Research Repository CoRR (2016). \nhttps:\/\/arxiv.org\/abs\/1610.00633"},{"key":"22_CR17","unstructured":"Luo, S., Kasaei, H., Schomaker, L.: Accelerating reinforcement learning for reaching using continuous curriculum learning. In: International Joint Conference on Neural Networks (IJCNN), pp. 1\u20138 (2020). \nhttps:\/\/ieeexplore.ieee.org\/document\/9207427"},{"key":"22_CR18","doi-asserted-by":"publisher","unstructured":"Pham, T.H., De Magistris, G., Tachibana, R.: OptLayer - practical constrained optimization for deep reinforcement learning in the real world. In: Proceedings - IEEE International Conference on Robotics and Automation, pp. 6236\u20136243 (2018). \nhttps:\/\/doi.org\/10.1109\/ICRA.2018.8460547","DOI":"10.1109\/ICRA.2018.8460547"},{"key":"22_CR19","unstructured":"Lucchi, M., Zindler, F., M\u00fchlbacher-Karrer, S., Pichler, H.: Robo-gym \u2013 an open source toolkit for distributed deep reinforcement learning on real and simulated robots. In: 2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS 2020) (2020). \nhttp:\/\/arxiv.org\/abs\/2007.02753"},{"key":"22_CR20","unstructured":"Pong, V., Gu, S., Dalal, M., Levine, S.: Temporal difference models: model-free deep RL for model-based control. In: 6th International Conference on Learning Representations (ICLR), pp. 1\u201314 (2018). \nhttps:\/\/arxiv.org\/abs\/1802.09081"},{"key":"22_CR21","unstructured":"Pinto, L., Mandalika, A., Hou, B., Srinivasa, S.: Sample-efficient learning of nonprehensile manipulation policies via physics-based informed state distributions. Computing Research Repository CoRR (2018). \nhttp:\/\/arxiv.org\/abs\/1810.10654"},{"key":"22_CR22","unstructured":"Trossen Robotics: \nhttps:\/\/www.trossenrobotics.com\/\n\n Accessed 07 Feb 2020"},{"key":"22_CR23","unstructured":"Quigley, M., et al.: ROS: an open-source robot operating system. In: Proceedings Open-Source Software workshop of the International Conference on Robotics and Automation (ICRA) (2009). \nhttp:\/\/ai.stanford.edu\/~mquigley\/papers\/icra2009-ros.pdf"},{"key":"22_CR24","unstructured":"Yang, B., Zhang, J., Pong, V., Levine, S., Jayaraman, D.: REPLAB: a reproducible low-cost arm benchmark platform for robotic learning. In: International Conference on Robotics and Automation (ICRA) (2019). \nhttp:\/\/arxiv.org\/abs\/1905.07447"},{"key":"22_CR25","unstructured":"Coumans, E., Bai, Y.: PyBullet, a Python Module for Physics Simulation for Games, Robotics and Machine Learning (2016\u20132019). \nhttp:\/\/pybullet.org"},{"key":"22_CR26","unstructured":"Brockman, G., et al.: OpenAI Gym (2016). \nhttp:\/\/gym.openai.com\/"},{"key":"22_CR27","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: 33rd International Conference on Machine Learning (ICML), vol. 48, pp. 1928\u20131937 (2016). \nhttps:\/\/arxiv.org\/abs\/1602.01783"},{"key":"22_CR28","unstructured":"Wu, Y., Mansimov, E., Liao, S., Grosse, R., Ba, J.: Scalable trust-region method for deep reinforcement learning using kronecker-factored approximation. In: 31st Conference on Neural Information Processing Systems (NIPS) (2017). \nhttps:\/\/arxiv.org\/abs\/1708.05144"},{"key":"22_CR29","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. Computing Research Repository (CoRR) (2017). \nhttp:\/\/arxiv.org\/abs\/1707.06347"},{"key":"22_CR30","unstructured":"Schulman, J., Levine, S., Moritz, P., Jordan, M.I., Abbeel, P.: Trust region policy optimization. In: 31st International Conference on Machine Learning (ICML) (2015). \nhttp:\/\/arxiv.org\/abs\/1502.05477"},{"key":"22_CR31","unstructured":"Hill, A., et al.: Stable Baselines. GitHub repository (2018). \nhttps:\/\/github.com\/hill-a\/stable-baselines"},{"key":"22_CR32","doi-asserted-by":"publisher","unstructured":"Akiba, T., Sano, S., Yanase, T., Ohta, T., Koyama, M.: Optuna: A next-generation hyperparameter optimization framework. In: Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 2623\u20132631 (2019). \nhttps:\/\/doi.org\/10.1145\/3292500.3330701","DOI":"10.1145\/3292500.3330701"},{"key":"22_CR33","unstructured":"University College Dublin, UCD Research Office and IT Services. ResearchIT Sonic HPC cluster"},{"key":"22_CR34","doi-asserted-by":"publisher","unstructured":"Pathak, D., Agrawal, P., Efros, A.A., Darrell, T.: Curiosity-driven exploration by self-supervised prediction. In: IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops, pp. 488\u2013489 (2017). \nhttps:\/\/doi.org\/10.1109\/CVPRW.2017.70","DOI":"10.1109\/CVPRW.2017.70"}],"container-title":["Advances in Intelligent Systems and Computing","Advances in Physical Agents II"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-62579-5_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,2]],"date-time":"2020-11-02T06:12:25Z","timestamp":1604297545000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-62579-5_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,11,3]]},"ISBN":["9783030625788","9783030625795"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-62579-5_22","relation":{},"ISSN":["2194-5357","2194-5365"],"issn-type":[{"type":"print","value":"2194-5357"},{"type":"electronic","value":"2194-5365"}],"subject":[],"published":{"date-parts":[[2020,11,3]]},"assertion":[{"value":"3 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"WAF","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Workshop of Physical Agents","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Alcal\u00e1 de Henares, Madrid","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 November 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 November 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"waf2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.robesafe.uah.es\/waf2020","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}