{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,20]],"date-time":"2026-04-20T15:41:26Z","timestamp":1776699686326,"version":"3.51.2"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,5]]},"DOI":"10.1109\/icra40945.2020.9196924","type":"proceedings-article","created":{"date-parts":[[2020,9,15]],"date-time":"2020-09-15T17:25:46Z","timestamp":1600190746000},"page":"4899-4905","source":"Crossref","is-referenced-by-count":55,"title":["Towards Safe Human-Robot Collaboration Using Deep Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Mohamed","family":"El-Shamouty","sequence":"first","affiliation":[{"name":"Fraunhofer IPA,Department of Robot and Assistive Systems"}]},{"given":"Xinyang","family":"Wu","sequence":"additional","affiliation":[{"name":"Fraunhofer IPA,Center for Cyber Cognitive Intelligence"}]},{"given":"Shanqi","family":"Yang","sequence":"additional","affiliation":[{"name":"Fraunhofer IPA,Department of Robot and Assistive Systems"}]},{"given":"Marcel","family":"Albus","sequence":"additional","affiliation":[{"name":"Fraunhofer IPA,Department of Robot and Assistive Systems"}]},{"given":"Marco F.","family":"Huber","sequence":"additional","affiliation":[{"name":"Fraunhofer IPA,Center for Cyber Cognitive Intelligence"}]}],"member":"263","reference":[{"key":"ref38","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017"},{"key":"ref33","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-66266-4_6"},{"key":"ref31","article-title":"Bayesian optimization with safety constraints: Safe and automatic parameter tuning in robotics","author":"berkenkamp","year":"2016"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref37","article-title":"SURREAL: Open-Source Reinforcement Learning Framework and Robot Manipulation Benchmark","author":"fan","year":"2018","journal-title":"Conference on Robot Learning"},{"key":"ref36","article-title":"Openai gym","author":"brockman","year":"2016"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref34","article-title":"Enhancing decision tree based interpretation of deep neural networks through L1-Orthogonal Regularization","author":"schaaf","year":"2017"},{"key":"ref10","author":"russell","year":"2016","journal-title":"Artificial Intelligence A Modern Approach"},{"key":"ref11","first-page":"5048","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref12","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/s10115-008-0139-1"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ITA.2016.7888195"},{"key":"ref16","article-title":"Concrete problems in AI safety","author":"amodei","year":"2016"},{"key":"ref17","article-title":"Assuring the machine learning lifecycle: Desiderata, methods, and challenges","author":"ashmore","year":"2019"},{"key":"ref18","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref19","author":"markov","year":"2010","journal-title":"Theory of Algorithms"},{"key":"ref28","first-page":"1437","article-title":"A comprehensive survey on safe reinforcement learning","volume":"16","author":"garc?a","year":"2015","journal-title":"Journal of Machine Learning Research"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ETFA.2017.8247648"},{"key":"ref27","doi-asserted-by":"crossref","DOI":"10.15607\/RSS.2019.XV.039","article-title":"Differentiable algorithm networks for composable robot learning","author":"karkus","year":"2019","journal-title":"Robotics Science and Systems (RSS)"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-658-04682-8_21"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.cirp.2017.04.095"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460547"},{"key":"ref5","article-title":"Production-oriented product validation method as support for the reuse of production lines in the automotive industry","author":"stanev","year":"2009","journal-title":"International Conference on Changeable Agile Reconfigurable and Virtual Production (CARV)"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ADPRL.2013.6614995"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.069"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-44733-8_43"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1561\/2300000052"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-53254-6"},{"key":"ref20","article-title":"An analysis of iso 26262: Using machine learning safely in automotive software","author":"salay","year":"2017"},{"key":"ref22","first-page":"22","article-title":"Constrained policy optimization","volume":"70","author":"achiam","year":"0"},{"key":"ref21","article-title":"Safe exploration in continuous action spaces","author":"dalal","year":"2018"},{"key":"ref24","first-page":"5392","article-title":"Hybrid reward architecture for reinforcement learning","author":"van seijen","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref23","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1145\/3197517.3201311","article-title":"Deepmimic: Example-guided deep reinforcement learning of physics-based character skills","volume":"37","author":"peng","year":"2018","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"ref26","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref25","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11694","article-title":"Deep reinforcement learning that matters","author":"henderson","year":"2018","journal-title":"Thirty-Second AAAI Conference on Artificial Intelligence"}],"event":{"name":"2020 IEEE International Conference on Robotics and Automation (ICRA)","location":"Paris, France","start":{"date-parts":[[2020,5,31]]},"end":{"date-parts":[[2020,8,31]]}},"container-title":["2020 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9187508\/9196508\/09196924.pdf?arnumber=9196924","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,18]],"date-time":"2022-11-18T08:59:07Z","timestamp":1668761947000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9196924\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,5]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/icra40945.2020.9196924","relation":{},"subject":[],"published":{"date-parts":[[2020,5]]}}}