{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T14:30:42Z","timestamp":1774449042871,"version":"3.50.1"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T00:00:00Z","timestamp":1764892800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T00:00:00Z","timestamp":1764892800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key R&D Program of China","doi-asserted-by":"crossref","award":["2023YFB4705002"],"award-info":[{"award-number":["2023YFB4705002"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Guangdong Provincial Key Laboratory of Construction Robotics and Intelligent Construction","award":["2022KSYS013"],"award-info":[{"award-number":["2022KSYS013"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Intell Robot Appl"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s41315-025-00507-6","type":"journal-article","created":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T08:29:28Z","timestamp":1764923368000},"page":"411-435","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Safe reinforcement learning for vision-based robotic manipulation in human-centered environments"],"prefix":"10.1007","volume":"10","author":[{"given":"Fawad","family":"Khan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Feng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiyong","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianlun","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiao","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yunduan","family":"Cui","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weijun","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,12,5]]},"reference":[{"key":"507_CR1","unstructured":"Kalashnikov, D., Irpan, A., Pastor, P., Ibarz, J., Herzog, A., Jang, E., Quillen, D., Holly, E., Kalakrishnan, M., Vanhoucke, V., et al.: Scalable deep reinforcement learning for vision-based robotic manipulation. In: Conference on Robot Learning, pp. 651\u2013673 (2018). PMLR"},{"key":"507_CR2","doi-asserted-by":"crossref","unstructured":"Simeone, D., Toldo, I., Cursi, S.: Operational scenarios simulation to support building design: a hospital design case study. In: Modelling and Simulation for Autonomous Systems: First International Workshop, MESAS 2014, Rome, Italy, May 5-6, 2014, Revised Selected Papers 1, pp. 127\u2013137 (2014). Springer","DOI":"10.1007\/978-3-319-13823-7_12"},{"key":"507_CR3","doi-asserted-by":"crossref","unstructured":"Kendall, A., Hawke, J., Janz, D., Mazur, P., Reda, D., Allen, J.-M., Lam, V.-D., Bewley, A., Shah, A.: Learning to drive in a day. In: 2019 International Conference on Robotics and Automation (ICRA), pp. 8248\u20138254 (2019). IEEE","DOI":"10.1109\/ICRA.2019.8793742"},{"key":"507_CR4","unstructured":"Richter, F., Orosco, R.K., Yip, M.C.: Open-sourced reinforcement learning environments for surgical robotics. arXiv preprint arXiv:1903.02090 (2019)"},{"key":"507_CR5","unstructured":"Gamble, C., Gao, J.: Safety-first ai for autonomous data centre cooling and industrial control. DeepMind, August 17 (2018)"},{"key":"507_CR6","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1016\/j.compeleceng.2019.07.019","volume":"78","author":"K Mason","year":"2019","unstructured":"Mason, K., Grijalva, S.: A review of reinforcement learning for autonomous building energy management. Computers & Electrical Engineering 78, 300\u2013312 (2019)","journal-title":"Computers & Electrical Engineering"},{"key":"507_CR7","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT press, ??? (2018)"},{"key":"507_CR8","unstructured":"Ray, A., Achiam, J., Amodei, D.: Benchmarking safe exploration in deep reinforcement learning. arXiv preprint arXiv:1910.01708 7(1), 2 (2019)"},{"key":"507_CR9","unstructured":"Peng, X.B., Coumans, E., Zhang, T., Lee, T.-W., Tan, J., Levine, S.: Learning agile robotic locomotion skills by imitating animals. arXiv preprint arXiv:2004.00784 (2020)"},{"issue":"1","key":"507_CR10","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1177\/0278364919887447","volume":"39","author":"OM Andrychowicz","year":"2020","unstructured":"Andrychowicz, O.M., Baker, B., Chociej, M., Jozefowicz, R., McGrew, B., Pachocki, J., Petron, A., Plappert, M., Powell, G., Ray, A., et al.: Learning dexterous in-hand manipulation. The International Journal of Robotics Research 39(1), 3\u201320 (2020)","journal-title":"The International Journal of Robotics Research"},{"key":"507_CR11","unstructured":"Nagabandi, A., Konolige, K., Levine, S., Kumar, V.: Deep dynamics models for learning dexterous manipulation. In: Conference on Robot Learning, pp. 1101\u20131112 (2020). PMLR"},{"key":"507_CR12","doi-asserted-by":"crossref","unstructured":"Chao, Y.-W., Paxton, C., Xiang, Y., Yang, W., Sundaralingam, B., Chen, T., Murali, A., Cakmak, M., Fox, D.: Handoversim: A simulation framework and benchmark for human-to-robot object handovers. In: 2022 International Conference on Robotics and Automation (ICRA), pp. 6941\u20136947 (2022). IEEE","DOI":"10.1109\/ICRA46639.2022.9812302"},{"issue":"9","key":"507_CR13","doi-asserted-by":"publisher","first-page":"2419","DOI":"10.1007\/s10994-021-05961-4","volume":"110","author":"G Dulac-Arnold","year":"2021","unstructured":"Dulac-Arnold, G., Levine, N., Mankowitz, D.J., Li, J., Paduraru, C., Gowal, S., Hester, T.: Challenges of real-world reinforcement learning: definitions, benchmarks and analysis. Mach. Learn. 110(9), 2419\u20132468 (2021)","journal-title":"Mach. Learn."},{"key":"507_CR14","unstructured":"Wang, L., Xiang, Y., Yang, W., Mousavian, A., Fox, D.: Goal-auxiliary actor-critic for 6d robotic grasping with point clouds. In: Conference on Robot Learning, pp. 70\u201380 (2022). PMLR"},{"key":"507_CR15","doi-asserted-by":"crossref","unstructured":"Wang, L., Xiang, Y., Fox, D.: Manipulation trajectory optimization with online grasp synthesis and selection. arXiv preprint arXiv:1911.10280 (2019)","DOI":"10.15607\/RSS.2020.XVI.033"},{"key":"507_CR16","first-page":"399","volume":"19","author":"C Guestrin","year":"2003","unstructured":"Guestrin, C., Koller, D., Parr, R., Venkataraman, S.: Efficient solution algorithms for factored mdps. J. Art. Int. Research 19, 399\u2013468 (2003)","journal-title":"J. Art. Int. Research"},{"key":"507_CR17","unstructured":"Zhou, A., Kumar, V., Finn, C., Rajeswaran, A.: Policy architectures for compositional generalization in control. arXiv preprint arXiv:2203.05960 (2022)"},{"key":"507_CR18","unstructured":"Lin, B., Bouneffouf, D., Rish, I.: A survey on compositional generalization in applications. arXiv preprint arXiv:2302.01067 (2023)"},{"key":"507_CR19","unstructured":"Zadaianchuk, A., Seitzer, M., Martius, G.: Self-supervised visual reinforcement learning with object-centric representations. arXiv preprint arXiv:2011.14381 (2020)"},{"key":"507_CR20","unstructured":"Hans, A., Schneega\u00df, D., Sch\u00e4fer, A.M., Udluft, S.: Safe exploration for reinforcement learning. In: ESANN, pp. 143\u2013148 (2008)"},{"issue":"1","key":"507_CR21","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1016\/0167-6377(96)00003-X","volume":"19","author":"M Haviv","year":"1996","unstructured":"Haviv, M.: On constrained markov decision processes. Operations Research Letters 19(1), 25\u201328 (1996)","journal-title":"Operations Research Letters"},{"issue":"39","key":"507_CR22","first-page":"1","volume":"17","author":"S Levine","year":"2016","unstructured":"Levine, S., Finn, C., Darrell, T., Abbeel, P.: End-to-end training of deep visuomotor policies. J. Mach. Learn. Res. 17(39), 1\u201340 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"507_CR23","doi-asserted-by":"crossref","unstructured":"Yarats, D., Zhang, A., Kostrikov, I., Amos, B., Pineau, J., Fergus, R.: Improving sample efficiency in model-free reinforcement learning from images. In: Proceedings of the Aaai Conference on Artificial Intelligence, vol. 35, pp. 10674\u201310681 (2021)","DOI":"10.1609\/aaai.v35i12.17276"},{"key":"507_CR24","unstructured":"Hafner, D., Pasukonis, J., Ba, J., Lillicrap, T.: Mastering diverse domains through world models. arXiv preprint arXiv:2301.04104 (2023)"},{"key":"507_CR25","unstructured":"Mambelli, D., Tr\u00e4uble, F., Bauer, S., Sch\u00f6lkopf, B., Locatello, F.: Compositional multi-object reinforcement learning with linear relation networks. arXiv preprint arXiv:2201.13388 (2022)"},{"key":"507_CR26","unstructured":"Gmelin, K., Bahl, S., Mendonca, R., Pathak, D.: Efficient rl via disentangled environment and agent representations. arXiv preprint arXiv:2309.02435 (2023)"},{"key":"507_CR27","doi-asserted-by":"crossref","unstructured":"Li, R., Jabri, A., Darrell, T., Agrawal, P.: Towards practical multi-object manipulation using relational reinforcement learning. In: 2020 Ieee International Conference on Robotics and Automation (icra), pp. 4051\u20134058 (2020). IEEE","DOI":"10.1109\/ICRA40945.2020.9197468"},{"key":"507_CR28","unstructured":"Sanchez-Gonzalez, A., Heess, N., Springenberg, J.T., Merel, J., Riedmiller, M., Hadsell, R., Battaglia, P.: Graph networks as learnable physics engines for inference and control. In: International Conference on Machine Learning, pp. 4470\u20134479 (2018). PMLR"},{"key":"507_CR29","first-page":"24170","volume":"35","author":"C Sancaktar","year":"2022","unstructured":"Sancaktar, C., Blaes, S., Martius, G.: Curious exploration via structured world models yields zero-shot object manipulation. Adv. Neural. Inf. Process. Syst. 35, 24170\u201324183 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"507_CR30","unstructured":"Watters, N., Matthey, L., Bosnjak, M., Burgess, C.P., Lerchner, A.: Cobra: Data-efficient model-based rl through unsupervised object discovery and curiosity-driven exploration. arXiv preprint arXiv:1905.09275 (2019)"},{"key":"507_CR31","doi-asserted-by":"crossref","unstructured":"Heravi, N., Wahid, A., Lynch, C., Florence, P., Armstrong, T., Tompson, J., Sermanet, P., Bohg, J., Dwibedi, D.: Visuomotor control in multi-object scenes using object-aware representations. In: 2023 IEEE International Conference on Robotics and Automation (ICRA), pp. 9515\u20139522 (2023). IEEE","DOI":"10.1109\/ICRA48891.2023.10160888"},{"key":"507_CR32","unstructured":"Yoon, J., Wu, Y.-F., Bae, H., Ahn, S.: An investigation into pre-training object-centric representations for reinforcement learning. arXiv preprint arXiv:2302.04419 (2023)"},{"key":"507_CR33","first-page":"11525","volume":"33","author":"F Locatello","year":"2020","unstructured":"Locatello, F., Weissenborn, D., Unterthiner, T., Mahendran, A., Heigold, G., Uszkoreit, J., Dosovitskiy, A., Kipf, T.: Object-centric learning with slot attention. Adv. Neural. Inf. Process. Syst. 33, 11525\u201311538 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"507_CR34","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"507_CR35","doi-asserted-by":"crossref","unstructured":"Todorov, E., Erez, T., Tassa, Y.: Mujoco: A physics engine for model-based control. In: 2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 5026\u20135033 (2012). IEEE","DOI":"10.1109\/IROS.2012.6386109"},{"key":"507_CR36","unstructured":"Coumans, E., Bai, Y.: Pybullet, a python module for physics simulation for games, robotics and machine learning (2016)"},{"key":"507_CR37","unstructured":"Carvalho, J., Le, A.T., Jahr, P., Sun, Q., Urain, J., Koert, D., Peters, J.: Grasp diffusion network: Learning grasp generators from partial point clouds with diffusion models in so (3) xr3. arXiv preprint arXiv:2412.08398 (2024)"},{"key":"507_CR38","doi-asserted-by":"crossref","unstructured":"Beyret, B., Shafti, A., Faisal, A.A.: Dot-to-dot: Explainable hierarchical reinforcement learning for robotic manipulation. In: 2019 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 5014\u20135019 (2019). IEEE","DOI":"10.1109\/IROS40897.2019.8968488"},{"key":"507_CR39","first-page":"278","volume":"99","author":"AY Ng","year":"1999","unstructured":"Ng, A.Y., Harada, D., Russell, S.: Policy invariance under reward transformations: Theory and application to reward shaping. Icml 99, 278\u2013287 (1999)","journal-title":"Icml"},{"key":"507_CR40","doi-asserted-by":"crossref","unstructured":"Bengio, Y., Louradour, J., Collobert, R., Weston, J.: Curriculum learning. In: Proceedings of the 26th Annual International Conference on Machine Learning, pp. 41\u201348 (2009)","DOI":"10.1145\/1553374.1553380"},{"key":"507_CR41","unstructured":"Garc\u00eda, J., Fern\u00e1ndez, F.: A comprehensive survey on safe reinforcement learning. Journal of Machine Learning Research (2015)"},{"key":"507_CR42","doi-asserted-by":"publisher","first-page":"205","DOI":"10.1613\/jair.1190","volume":"19","author":"E Wiewiora","year":"2003","unstructured":"Wiewiora, E.: Potential-based shaping and q-value initialization are equivalent. Journal of Artificial Intelligence Research 19, 205\u2013208 (2003)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"507_CR43","unstructured":"Irshad, M.Z.: Learning 3d robotics perception using inductive priors. arXiv preprint arXiv:2405.20364 (2024)"},{"key":"507_CR44","unstructured":"Andrychowicz, M., Wolski, F., Ray, A., Schneider, J., Fong, R., Welinder, P., McGrew, B., Tobin, J., Pieter\u00a0Abbeel, O., Zaremba, W.: Hindsight experience replay. Advances in neural information processing systems 30 (2017)"},{"key":"507_CR45","unstructured":"Achiam, J., Held, D., Tamar, A., Abbeel, P.: Constrained policy optimization. In: International Conference on Machine Learning, pp. 22\u201331 (2017). PMLR"},{"issue":"11","key":"507_CR46","doi-asserted-by":"publisher","first-page":"1238","DOI":"10.1177\/0278364913495721","volume":"32","author":"J Kober","year":"2013","unstructured":"Kober, J., Bagnell, J.A., Peters, J.: Reinforcement learning in robotics: A survey. The International Journal of Robotics Research 32(11), 1238\u20131274 (2013)","journal-title":"The International Journal of Robotics Research"},{"key":"507_CR47","unstructured":"Mahmood, A.R., Korenkevych, D., Vasan, G., Ma, W., Bergstra, J.: Benchmarking reinforcement learning algorithms on real-world robots. In: Conference on Robot Learning, pp. 561\u2013591 (2018). PMLR"},{"key":"507_CR48","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. In: Proceedings of the 34th International Conference on Machine Learning, pp. 1\u201312 (2017). PMLR"},{"key":"507_CR49","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp. 1861\u20131870 (2018). PMLR"},{"issue":"4","key":"507_CR50","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3528223.3530182","volume":"41","author":"J Romero","year":"2022","unstructured":"Romero, J., Tzionas, D., Black, M.J.: Embodied hands: Modeling and capturing hands and bodies together. ACM Transactions on Graphics 41(4), 1\u201317 (2022)","journal-title":"ACM Transactions on Graphics"},{"issue":"11","key":"507_CR51","doi-asserted-by":"publisher","first-page":"7178","DOI":"10.1109\/LRA.2023.3313063","volume":"8","author":"P Xie","year":"2023","unstructured":"Xie, P., Chen, R., Chen, S., Qin, Y., Xiang, F., Sun, T., Xu, J., Wang, G., Su, H.: Part-guided 3d rl for sim2real articulated object manipulation. IEEE Robotics and Automation Letters 8(11), 7178\u20137185 (2023)","journal-title":"IEEE Robotics and Automation Letters"}],"container-title":["International Journal of Intelligent Robotics and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41315-025-00507-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s41315-025-00507-6","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41315-025-00507-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T13:07:40Z","timestamp":1774444060000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s41315-025-00507-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,5]]},"references-count":51,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["507"],"URL":"https:\/\/doi.org\/10.1007\/s41315-025-00507-6","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-6736564\/v1","asserted-by":"object"}]},"ISSN":["2366-5971","2366-598X"],"issn-type":[{"value":"2366-5971","type":"print"},{"value":"2366-598X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,5]]},"assertion":[{"value":"24 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interests"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"All authors have given their consent for the publication of this manuscript.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Materials availability"}},{"value":"The primary codebase used in this study is publicly available on GitHub\n                      \n                      and has also been discussed in the reference section (Ray et\u00a0al.\n                      \n                      ). Specific implementations and modifications used in this study will be publicly available upon publication, but can also be provided upon request.","order":6,"name":"Ethics","group":{"name":"EthicsHeading","label":"Code availability"}}]}}