{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,15]],"date-time":"2026-05-15T11:42:14Z","timestamp":1778845334507,"version":"3.51.4"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030958916","type":"print"},{"value":"9783030958923","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-030-95892-3_41","type":"book-chapter","created":{"date-parts":[[2022,4,7]],"date-time":"2022-04-07T07:02:54Z","timestamp":1649314974000},"page":"532-550","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":21,"title":["Robotic Arm Control and\u00a0Task Training Through Deep Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Andrea","family":"Franceschetti","sequence":"first","affiliation":[]},{"given":"Elisa","family":"Tosello","sequence":"additional","affiliation":[]},{"given":"Nicola","family":"Castaman","sequence":"additional","affiliation":[]},{"given":"Stefano","family":"Ghidoni","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,4,8]]},"reference":[{"issue":"3\u20134","key":"41_CR1","first-page":"229","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach. Learn. 8(3\u20134), 229\u2013256 (1992)","journal-title":"Mach. Learn."},{"key":"41_CR2","unstructured":"Schulman, J., Moritz, P., Levine, S., Jordan, M.I., Abbeel. P.: High-dimensional continuous control using generalized advantage estimation. In: Bengio, Y., LeCun, Y. (eds.) 4th International Conference on Learning Representations, ICLR 2016, Conference Track Proceedings, San Juan, Puerto Rico, 2\u20134 May 2016 (2016)"},{"key":"41_CR3","unstructured":"Silver, D., Lever, G., Heess, N., Degris, T., Wierstra, T., Riedmiller, M.: Deterministic policy gradient algorithms. In: Proceedings of the 31st International Conference on Machine Learning, ICML-14, pp. 387\u2013395 (2014)"},{"key":"41_CR4","unstructured":"Gu, S., Lillicrap, T., Sutskever, I., Levine, S.: Continuous deep q-learning with model-based acceleration. In: International Conference on Machine Learning, pp. 2829\u20132838 (2016)"},{"key":"41_CR5","doi-asserted-by":"crossref","unstructured":"Varin, P., Grossman, L., Kuindersma, S.: A comparison of action spaces for learning manipulation tasks. In: 2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 6015\u20136021 (2019)","DOI":"10.1109\/IROS40897.2019.8967946"},{"key":"41_CR6","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. CoRR, abs\/1707.06347 (2017)"},{"key":"41_CR7","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: Dy, J., Krause, A. (eds.) Proceedings of the 35th International Conference on Machine Learning, vol. 80 of Proceedings of Machine Learning Research, 10\u201315 July 2018, pp 1861\u20131870. PMLR (2018)"},{"key":"41_CR8","doi-asserted-by":"crossref","unstructured":"Ceola, F., Tosello, E., Tagliapietra, L., Nicola, G., Ghidoni, S.: Robot task planning via deep reinforcement learning: a tabletop object sorting application. In: 2019 IEEE International Conference on Systems, Man and Cybernetics (SMC), pp. 486\u2013492 (2019)","DOI":"10.1109\/SMC.2019.8914278"},{"key":"41_CR9","doi-asserted-by":"crossref","unstructured":"Nicola, G., Tagliapietra, L., Tosello, E., Navarin, N., Ghidoni, S., Menegatti, E.: Robotic object sorting via deep reinforcement learning: a generalized approach. In: 2020 29th IEEE International Conference on Robot and Human Interactive Communication (RO-MAN), pp. 1266\u20131273 (2020)","DOI":"10.1109\/RO-MAN47096.2020.9223484"},{"key":"41_CR10","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4613-1381-6","volume-title":"Explanation-Based Neural Network Learning: A Lifelong Learning Approach","author":"S Thrun","year":"1996","unstructured":"Thrun, S.: Explanation-Based Neural Network Learning: A Lifelong Learning Approach. Kluwer Academic Publishers, Norwell (1996)"},{"key":"41_CR11","doi-asserted-by":"crossref","unstructured":"Gu, S., Holly, E., Lillicrap, T., Levine, S.: Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. In: 2017 IEEE International Conference on Robotics and Automation (ICRA), pp. 3389\u20133396. IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"41_CR12","unstructured":"Andrychowicz, M., et al.: Hindsight experience replay. In: Guyon, I., et al. (eds.) Advances in Neural Information Processing Systems, vol. 30, pp. 5055\u20135065 (2017)"},{"issue":"1","key":"41_CR13","first-page":"1334","volume":"17","author":"S Levine","year":"2016","unstructured":"Levine, S., Finn, C., Darrell, T., Abbeel, P.: End-to-end training of deep visuomotor policies. J. Mach. Learn. Res. 17(1), 1334\u20131373 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"41_CR14","unstructured":"Chebotar, Y., Hausman, K., Zhang, M., Sukhatme, G., Schaal, S., Levine, S.: Combining model-based and model-free updates for trajectory-centric reinforcement learning. arXiv preprint arXiv:1703.03078 (2017)"},{"key":"41_CR15","unstructured":"Levine, S., Koltun, V.: Guided policy search. In: Proceedings of the 30th International Conference on International Conference on Machine Learning, ICML 2013, vol. 28, pp. III-1\u2013III-9. JMLR.org (2013)"},{"key":"41_CR16","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley (2014)"},{"key":"41_CR17","doi-asserted-by":"publisher","unstructured":"Kober J., Peters J.: Reinforcement learning in robotics: a survey. In: Wiering, M., van Otterlo, M. (eds.) Reinforcement Learning. Adaptation, Learning, and Optimization, vol. 12. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-27645-3_18","DOI":"10.1007\/978-3-642-27645-3_18"},{"key":"41_CR18","unstructured":"Kimura, H., Kobayashi, S.: Reinforcement learning for continuous action using stochastic gradient ascent. In: 5th Intelligent Autonomous Systems, pp. 288\u2013295 (1998)"},{"key":"41_CR19","unstructured":"Duan, Y., Chen, X., Houthooft, R., Schulman, J., Abbeel, P.: Benchmarking deep reinforcement learning for continuous control. In: Proceedings of the 33rd International Conference on International Conference on Machine Learning, ICML 2016, vol. 48, pp. 1329\u20131338 (2016)"},{"key":"41_CR20","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. CoRR, abs\/1412.6980 (2014)"},{"key":"41_CR21","unstructured":"Islam, R., Henderson, P., Gomrokchi, M., Precup, D.: Reproducibility of benchmarked deep reinforcement learning tasks for continuous control. arXiv preprint arXiv:1708.04133 (2017)"},{"key":"41_CR22","unstructured":"Quigley, M., et al.: ROS: an open-source robot operating system. In: ICRA Workshop on Open Source Software, Kobe, Japan, vol. 3, p. 5 (2009)"},{"key":"41_CR23","doi-asserted-by":"crossref","unstructured":"Olson, E.: AprilTag: a robust and flexible visual fiducial system. In: 2011 IEEE International Conference on Robotics and Automation (ICRA), pp. 3400\u20133407. IEEE (2011)","DOI":"10.1109\/ICRA.2011.5979561"},{"issue":"1","key":"41_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1080\/01691864.2020.1833752","volume":"35","author":"N Castaman","year":"2021","unstructured":"Castaman, N., et al.: RUR53: an unmanned ground vehicle for navigation, recognition, and manipulation. Adv. Robot. 35(1), 1\u201318 (2021)","journal-title":"Adv. Robot."},{"key":"41_CR25","unstructured":"de Freitas, E.P., et al.: Ontological concepts for information sharing in cloud robotics. J. Ambient Intell. Humaniz. Comput. (2020)"},{"key":"41_CR26","series-title":"Advances in Intelligent Systems and Computing","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1007\/978-3-319-48036-7_21","volume-title":"Intelligent Autonomous Systems 14","author":"E Tosello","year":"2017","unstructured":"Tosello, E., Fan, Z., Castro, A.G., Pagello, E.: Cloud-based task planning for smart robots. In: Chen, W., Hosoda, K., Menegatti, E., Shimizu, M., Wang, H. (eds.) IAS 2016. AISC, vol. 531, pp. 285\u2013300. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-48036-7_21"},{"key":"41_CR27","unstructured":"Tosello, E., Fan, Z., Pagello, E.: A semantic knowledge base for cognitive robotics manipulator. In: Workshop on Toward Intelligent Social Robots - Current Advances in Cognitive Robotics (2015)"},{"key":"41_CR28","unstructured":"Fan, Z., Tosello, E., Palmia, M., Pagello, E.: Applying semantic web technologies to multi-robot coordination. In: Workshop on New Research Frontiers for Intelligent Autonomous Systems, NRF-IAS-2014 (2014)"}],"container-title":["Lecture Notes in Networks and Systems","Intelligent Autonomous Systems 16"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-95892-3_41","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,7]],"date-time":"2022-04-07T07:17:09Z","timestamp":1649315829000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-95892-3_41"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783030958916","9783030958923"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-95892-3_41","relation":{},"ISSN":["2367-3370","2367-3389"],"issn-type":[{"value":"2367-3370","type":"print"},{"value":"2367-3389","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"8 April 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IAS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Autonomous Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 June 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 June 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ias2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ias-16.com","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}