{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T03:10:44Z","timestamp":1778037044566,"version":"3.51.4"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100011443","name":"Guangdong Provincial Key Laboratory of Robotics and Intelligent Systems","doi-asserted-by":"publisher","award":["2022KSYS013"],"award-info":[{"award-number":["2022KSYS013"]}],"id":[{"id":"10.13039\/501100011443","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Intel Serv Robotics"],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1007\/s11370-025-00668-0","type":"journal-article","created":{"date-parts":[[2026,1,4]],"date-time":"2026-01-04T01:16:39Z","timestamp":1767489399000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Reinforcement learning for precision grasping and safety-critical coordination in a robotic arm"],"prefix":"10.1007","volume":"19","author":[{"given":"Fawad","family":"Khan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Feng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianlun","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiyong","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiao","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shahid Asad","family":"Ali","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6011-2598","authenticated-orcid":false,"given":"Weijun","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,3]]},"reference":[{"key":"668_CR1","doi-asserted-by":"crossref","unstructured":"Beyret B, Shafti A, Faisal AA (2019) Dot-to-dot: Explainable hierarchical reinforcement learning for robotic manipulation. In: 2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 5014\u20135019. IEEE","DOI":"10.1109\/IROS40897.2019.8968488"},{"issue":"3","key":"668_CR2","first-page":"135","volume":"15","author":"L Stan","year":"2020","unstructured":"Stan L, Nicolescu AF, Pup\u0103z\u0103 C (2020) Reinforcement learning for assembly robots: A review. Proceedings in Manufacturing Systems 15(3):135\u2013146","journal-title":"Proceedings in Manufacturing Systems"},{"key":"668_CR3","doi-asserted-by":"crossref","unstructured":"Chao YW, Paxton C, Xiang Y, Yang W, Sundaralingam B, Chen T, Murali A, Cakmak M, Fox D (2022) Handoversim: A simulation framework and benchmark for human-to-robot object handovers. In: 2022 International conference on robotics and automation (ICRA), IEEE, pp. 6941\u20136947","DOI":"10.1109\/ICRA46639.2022.9812302"},{"key":"668_CR4","unstructured":"Stone A, Xiao T, Lu Y, Gopalakrishnan K, Lee KH, Vuong Q, Wohlhart P, Kirmani S, Zitkovich B, Xia F, Finn C (2023) Open-world object manipulation using pre-trained vision-language models. Preprint at arXiv:2303.00905"},{"key":"668_CR5","doi-asserted-by":"crossref","unstructured":"Kendall A, Hawke J, Janz D, Mazur P, Reda D, Allen JM, Lam VD, Bewley A, Shah A (2019) Learning to drive in a day. In: 2019 international conference on robotics and automation (ICRA), IEEE, pp. 8248\u20138254","DOI":"10.1109\/ICRA.2019.8793742"},{"key":"668_CR6","unstructured":"Abbasimoshaei A, Stein T, Rothe T, Kern TA (2020) Design and impedance control of a hydraulic robot for paralyzed people. In: 8th RSI international conference on robotics and mechatronics, ICRoM 2020"},{"key":"668_CR7","unstructured":"Richter F, Orosco RK, Yip MC (2019) Open-sourced reinforcement learning environments for surgical robotics. Preprint at arXiv:1903.02090"},{"key":"668_CR8","doi-asserted-by":"publisher","first-page":"409","DOI":"10.3390\/act13100409","volume":"13","author":"M Sadeghi","year":"2024","unstructured":"Sadeghi M, Abbasimoshaei A, Kitajima Borges JP, Kern TA (2024) Numerical and experimental study of a wearable exo-glove for telerehabilitation application using shape memory alloy actuators. Actuators 13:409","journal-title":"Actuators"},{"key":"668_CR9","unstructured":"Gamble C, Gao J (2018) Safety-first ai for autonomous data centre cooling and industrial control. DeepMind, August 17"},{"key":"668_CR10","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1016\/j.compeleceng.2019.07.019","volume":"78","author":"K Mason","year":"2019","unstructured":"Mason K, Grijalva S (2019) A review of reinforcement learning for autonomous building energy management. Comput Elect Eng 78:300\u2013312","journal-title":"Comput Elect Eng"},{"key":"668_CR11","unstructured":"Ray A, Achiam J, Amodei D (2019) Benchmarking safe exploration in deep reinforcement learning. Preprint at arXiv:1910.01708 7(1), 2"},{"key":"668_CR12","unstructured":"Wang L, Xiang Y, Yang W, Mousavian A, Fox D (2022) Goal-auxiliary actor-critic for 6d robotic grasping with point clouds. In: Conference on robot learning, PMLR, pp. 70\u201380"},{"key":"668_CR13","unstructured":"Peng XB, Coumans E, Zhang T, Lee TW, Tan J, Levine S (2020) Learning agile robotic locomotion skills by imitating animals. Preprint at arXiv:2004.00784"},{"key":"668_CR14","doi-asserted-by":"crossref","unstructured":"Wang L, Xiang Y, Fox D (2019) Manipulation trajectory optimization with online grasp synthesis and selection. Preprint at arXiv:1911.10280","DOI":"10.15607\/RSS.2020.XVI.033"},{"key":"668_CR15","doi-asserted-by":"crossref","unstructured":"Rajeswaran A, Kumar V, Gupta A, Vezzani G, Schulman J, Todorov E, Levine S (2017) Learning complex dexterous manipulation with deep reinforcement learning and demonstrations. Preprint at arXiv:1709.10087","DOI":"10.15607\/RSS.2018.XIV.049"},{"issue":"1","key":"668_CR16","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1177\/0278364919887447","volume":"39","author":"OM Andrychowicz","year":"2020","unstructured":"Andrychowicz OM, Baker B, Chociej M, Jozefowicz R, McGrew B, Pachocki J, Petron A, Plappert M, Powell G, Ray A (2020) Learning dexterous in-hand manipulation. Int J Robot Res 39(1):3\u201320","journal-title":"Int J Robot Res"},{"key":"668_CR17","unstructured":"Nagabandi A, Konolige K, Levine S, Kumar V (2020) Deep dynamics models for learning dexterous manipulation. In: Conference on robot learning, PMLR, pp. 1101\u20131112"},{"key":"668_CR18","unstructured":"Gallou\u00e9dec Q, Cazin N, Dellandr\u00e9a E, Chen L (2021) panda-gym: Open-source goal-conditioned environments for robotic learning. Preprint at arXiv:2106.13687"},{"key":"668_CR19","unstructured":"Chen T, Xu J, Agrawal P (2022) A system for general in-hand object re-orientation. In: Conference on robot learning, PMLR, pp. 297\u2013307"},{"key":"668_CR20","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1016\/j.procir.2022.05.055","volume":"107","author":"P Segura","year":"2022","unstructured":"Segura P, Lobato-Calleros O, Ram\u00edrez-Serrano A, Hern\u00e1ndez-Mart\u00ednez EG (2022) Safety assurance in human-robot collaborative systems: a survey in the manufacturing industry. Procedia CIRP 107:740\u2013745","journal-title":"Procedia CIRP"},{"key":"668_CR21","unstructured":"Sun Z, He S, Miao F, Zou S (2024) Constrained reinforcement learning under model mismatch. Preprint at arXiv:2405.01327"},{"key":"668_CR22","doi-asserted-by":"crossref","unstructured":"Ying C, Zhou X, Su H, Yan D, Chen N, Zhu J (2022) Towards safe reinforcement learning via constraining conditional value-at-risk. Preprint at arXiv:2206.04436","DOI":"10.24963\/ijcai.2022\/510"},{"key":"668_CR23","doi-asserted-by":"crossref","unstructured":"El-Shamouty M, Wu X, Yang S, Albus M, Huber MF (2020) Towards safe human-robot collaboration using deep reinforcement learning. In: 2020 IEEE international conference on robotics and automation (ICRA), IEEE, pp. 4899\u20134905","DOI":"10.1109\/ICRA40945.2020.9196924"},{"key":"668_CR24","doi-asserted-by":"crossref","unstructured":"Liu P, Zhang K, Tateo D, Jauhri S, Hu Z, Peters J, Chalvatzaki G (2023) Safe reinforcement learning of dynamic high-dimensional robotic tasks: navigation, manipulation, interaction. In: 2023 IEEE international conference on robotics and automation (ICRA), IEEE, pp. 9449\u20139456","DOI":"10.1109\/ICRA48891.2023.10161548"},{"key":"668_CR25","doi-asserted-by":"crossref","unstructured":"Zhu Y, Wang Z, Merel J, Rusu A, Erez T, Cabi S, Tunyasuvunakool S, Kram\u00e1r J, Hadsell R, de Freitas N, Heess N. (2018) Reinforcement and imitation learning for diverse visuomotor skills. Preprint at arXiv:1802.09564","DOI":"10.15607\/RSS.2018.XIV.009"},{"key":"668_CR26","doi-asserted-by":"crossref","unstructured":"Levine S, Wagener N, Abbeel P (2015) Learning contact-rich manipulation skills with guided policy search (2015). Preprint at arXiv:1501.05611","DOI":"10.1109\/ICRA.2015.7138994"},{"issue":"39","key":"668_CR27","first-page":"1","volume":"17","author":"S Levine","year":"2016","unstructured":"Levine S, Finn C, Darrell T, Abbeel P (2016) End-to-end training of deep visuomotor policies. J Mach Learn Res 17(39):1\u201340","journal-title":"J Mach Learn Res"},{"key":"668_CR28","unstructured":"Deisenroth M, Rasmussen CE (2011) Pilco: a model-based and data-efficient approach to policy search. In: Proceedings of the 28th International conference on machine learning (ICML-11), pp. 465\u2013472"},{"key":"668_CR29","unstructured":"Chebotar Y, Hausman K, Zhang M, Sukhatme G, Schaal S, Levine S (2017) Combining model-based and model-free updates for trajectory-centric reinforcement learning. In: International conference on machine learning, PMLR, pp. 703\u2013711"},{"key":"668_CR30","unstructured":"Levine, S.: Reinforcement learning and control as probabilistic inference: tutorial and review. 2018. URL https:\/\/arxiv. org\/pdf\/1805.00909. pdf (1805)"},{"key":"668_CR31","doi-asserted-by":"crossref","unstructured":"Quillen D, Jang E, Nachum O, Finn C, Ibarz J, Levine S (2018) Deep reinforcement learning for vision-based robotic grasping: a simulated comparative evaluation of off-policy methods. In: 2018 IEEE international conference on robotics and automation (ICRA), IEEE, pp. 6284\u20136291","DOI":"10.1109\/ICRA.2018.8461039"},{"key":"668_CR32","doi-asserted-by":"crossref","unstructured":"Mart\u00edn-Mart\u00edn R, Lee MA, Gardner R, Savarese S, Bohg J, Garg A (2019) Variable impedance control in end-effector space: An action space for reinforcement learning in contact-rich tasks. In: 2019 IEEE\/RSJ international conference on intelligent robots and systems (IROS), IEEE pp. 1010\u20131017","DOI":"10.1109\/IROS40897.2019.8968201"},{"key":"668_CR33","doi-asserted-by":"crossref","unstructured":"Tobin J, Fong R, Ray A, Schneider J, Zaremba W, Abbeel P (2017) Domain randomization for transferring deep neural networks from simulation to the real world. In: 2017 IEEE\/RSJ international conference on intelligent robots and systems (IROS), IEEE, pp. 23\u201330","DOI":"10.1109\/IROS.2017.8202133"},{"key":"668_CR34","unstructured":"Carvalho J, Le AT, Jahr P, Sun Q, Urain J, Koert D, Peters J (2024) Grasp diffusion network: learning grasp generators from partial point clouds with diffusion models in so (3) xr3. Preprint at arXiv:2412.08398"},{"key":"668_CR35","doi-asserted-by":"crossref","unstructured":"Todorov E, Erez T, Tassa Y (2012) Mujoco: a physics engine for model-based control. In: 2012 IEEE\/RSJ international conference on intelligent robots and systems, IEEE, pp. 5026\u20135033","DOI":"10.1109\/IROS.2012.6386109"},{"key":"668_CR36","unstructured":"Coumans E, Bai Y (2016) Pybullet, a python module for physics simulation for games, robotics and machine learning"},{"key":"668_CR37","doi-asserted-by":"publisher","DOI":"10.1002\/9781118776353","volume-title":"Reliability of safety-critical systems: theory and applications","author":"M Rausand","year":"2014","unstructured":"Rausand M (2014) Reliability of safety-critical systems: theory and applications. Wiley"},{"key":"668_CR38","doi-asserted-by":"crossref","unstructured":"Salay R, Queiroz R, Czarnecki K (2017) An analysis of iso 26262: Using machine learning safely in automotive software. Preprint at arXiv:1709.02435","DOI":"10.4271\/2018-01-1075"},{"key":"668_CR39","volume-title":"Reinforcement learning: an introduction","author":"RS Sutton","year":"2018","unstructured":"Sutton RS, Barto AG (2018) Reinforcement learning: an introduction. MIT press"},{"key":"668_CR40","unstructured":"Dalal G, Dvijotham K, Vecerik M, Hester T, Paduraru C, Tassa Y (2018) Safe exploration in continuous action spaces. Preprint at arXiv:1801.08757"},{"key":"668_CR41","unstructured":"Achiam J, Held D, Tamar A, Abbeel P (2017) Constrained policy optimization. In: International conference on machine learning, PMLR, pp. 22\u201331"},{"key":"668_CR42","doi-asserted-by":"crossref","unstructured":"Henderson P, Islam R, Bachman P, Pineau J, Precup D, Meger D (2018) Deep reinforcement learning that matters. In: Proceedings of the AAAI conference on artificial intelligence, 32","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"668_CR43","doi-asserted-by":"crossref","unstructured":"Bacon PL, Harb J, Precup D (2017) The option-critic architecture. In: Proceedings of the AAAI conference on artificial intelligence, 31","DOI":"10.1609\/aaai.v31i1.10916"},{"key":"668_CR44","unstructured":"Vezhnevets AS, Osindero S, Schaul T, Heess N, Jaderberg M, Silver D, Kavukcuoglu K (2017) Feudal networks for hierarchical reinforcement learning. In: International conference on machine learning, PMLR, pp. 3540\u20133549"},{"key":"668_CR45","first-page":"278","volume":"99","author":"AY Ng","year":"1999","unstructured":"Ng AY, Harada D, Russell S (1999) Policy invariance under reward transformations: theory and application to reward shaping. ICML 99:278\u2013287","journal-title":"ICML"},{"key":"668_CR46","unstructured":"Kalashnikov D, Irpan A, Pastor P, Ibarz J, Herzog A, Jang E, Quillen D, Holly E, Kalakrishnan M, Vanhoucke V, Levine S (2018) Scalable deep reinforcement learning for vision-based robotic manipulation. In: Conference on robot learning, PMLR, pp. 651\u2013673"},{"key":"668_CR47","doi-asserted-by":"publisher","first-page":"15281","DOI":"10.52202\/068431-1112","volume":"35","author":"A Gupta","year":"2022","unstructured":"Gupta A, Pacchiano A, Zhai Y, Kakade S, Levine S (2022) Unpacking reward shaping: understanding the benefits of reward engineering on sample complexity. Adv Neural Inf Process Syst 35:15281\u201315295","journal-title":"Adv Neural Inf Process Syst"},{"key":"668_CR48","doi-asserted-by":"crossref","unstructured":"Ranjan A, Agrawal S, Jain A, Jagtap P, Kolathaya S (2024) Barrier functions inspired reward shaping for reinforcement learning. In: 2024 IEEE International conference on robotics and automation (ICRA), IEEE, pp. 10807\u201310813","DOI":"10.1109\/ICRA57147.2024.10610391"},{"key":"668_CR49","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. In: Proceedings of the 34th international conference on machine learning, PMLR, pp. 1\u201312"},{"key":"668_CR50","doi-asserted-by":"publisher","DOI":"10.1016\/j.rcim.2024.102792","volume":"90","author":"T Li","year":"2024","unstructured":"Li T, Yan Y, Yu C, An J, Wang Y, Chen G (2024) A comprehensive review of robot intelligent grasping based on tactile perception. Robot Comput Integr Manuf 90:102792","journal-title":"Robot Comput Integr Manuf"},{"key":"668_CR51","doi-asserted-by":"crossref","unstructured":"Ding Z, Tsai YY, Lee WW, Huang B (2021) Sim-to-real transfer for robotic manipulation with tactile sensory. In: 2021 IEEE\/RSJ International conference on intelligent robots and systems (IROS), IEEE, pp. 6778\u20136785","DOI":"10.1109\/IROS51168.2021.9636259"},{"key":"668_CR52","unstructured":"Romero J, Tzionas D, Black MJ (2022) Embodied hands: modeling and capturing hands and bodies together. Preprint at arXiv:2201.02610"},{"key":"668_CR53","doi-asserted-by":"publisher","first-page":"1340","DOI":"10.1109\/TIP.2022.3141258","volume":"31","author":"Z Yu","year":"2022","unstructured":"Yu Z, Shen D, Jin Z, Huang J, Cai D, Hua X-S (2022) Progressive transfer learning. IEEE Trans Image Process 31:1340\u20131348","journal-title":"IEEE Trans Image Process"}],"container-title":["Intelligent Service Robotics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11370-025-00668-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11370-025-00668-0","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11370-025-00668-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T02:16:09Z","timestamp":1778033769000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11370-025-00668-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1]]},"references-count":53,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,1]]}},"alternative-id":["668"],"URL":"https:\/\/doi.org\/10.1007\/s11370-025-00668-0","relation":{},"ISSN":["1861-2776","1861-2784"],"issn-type":[{"value":"1861-2776","type":"print"},{"value":"1861-2784","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1]]},"assertion":[{"value":"17 September 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"All authors have given their consent for the publication of this manuscript.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}],"article-number":"16"}}