{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,7]],"date-time":"2025-10-07T14:33:10Z","timestamp":1759847590892,"version":"3.28.0"},"reference-count":46,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10160686","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T13:20:56Z","timestamp":1688476856000},"page":"5751-5757","source":"Crossref","is-referenced-by-count":5,"title":["Uncertainty-Guided Active Reinforcement Learning with Bayesian Neural Networks"],"prefix":"10.1109","author":[{"given":"Xinyang","family":"Wu","sequence":"first","affiliation":[{"name":"Fraunhofer IPA,Cyber Cognitive Intelligence Department"}]},{"given":"Mohamed","family":"El-Shamouty","sequence":"additional","affiliation":[{"name":"Fraunhofer IPA,Robot and Assistive Systems Department"}]},{"given":"Christof","family":"Nitsche","sequence":"additional","affiliation":[{"name":"Fraunhofer IPA,Cyber Cognitive Intelligence Department"}]},{"given":"Marco F.","family":"Huber","sequence":"additional","affiliation":[{"name":"Fraunhofer IPA,Cyber Cognitive Intelligence Department"}]}],"member":"263","reference":[{"key":"ref13","article-title":"Actor-critic algorithms","volume":"12","author":"konda","year":"1999","journal-title":"Advances in neural information processing systems"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/0370-2693(87)91197-X"},{"journal-title":"Theory of Algorithms","year":"2010","author":"markov","key":"ref12"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.1984.4767596"},{"key":"ref15","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2015","journal-title":"ArXiv Preprint"},{"key":"ref37","article-title":"Training multilayer perceptrons with the ex-tended kalman algorithm","volume":"1","author":"singhal","year":"1988","journal-title":"Advances in neural information processing systems"},{"key":"ref14","article-title":"Policy gradi-ent methods for reinforcement learning with function approximation","volume":"12","author":"sutton","year":"1999","journal-title":"Advances in neural information processing systems"},{"key":"ref36","first-page":"1593","article-title":"The no-u-turn sampler: adaptively setting path lengths in hamiltonian monte carlo","volume":"15","author":"hoffman","year":"2014","journal-title":"J Mach Learn Res"},{"key":"ref31","article-title":"Stochastic variational inference","author":"hoffman","year":"2013","journal-title":"Journal of Machine Learning Research"},{"key":"ref30","volume":"4","author":"bishop","year":"2006","journal-title":"Pattern Recognition and Machine Learning"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref11"},{"journal-title":"Monte Carlo Sampling Methods using Markov Chains and their Applications","year":"1970","author":"hastings","key":"ref33"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1002\/j.1538-7305.1948.tb01338.x"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1063\/1.1699114"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.4103\/jfmpc.jfmpc_440_19"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2020.2988642"},{"key":"ref17","article-title":"Data-efficient deep reinforcement learning for dexterous manipulation","author":"popov","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref39","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1002\/0471221546.ch2","article-title":"Parameter-based kalman filter training: Theory and implementation","author":"puskorius","year":"2001","journal-title":"Kalman Filtering and Neural Networks"},{"key":"ref16","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume":"99","author":"ng","year":"1999","journal-title":"ICML"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/BF00439421"},{"key":"ref19","article-title":"Sample efficient actor-critic with experience replay","author":"wang","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref18","first-page":"4344","article-title":"Learning by playing solving sparse reward tasks from scratch","author":"riedmiller","year":"2018","journal-title":"Int Conference on Machine Learning"},{"key":"ref24","first-page":"1929","article-title":"Dropout: A Simple Way to Prevent Neural Networks from Overfitting","volume":"15","author":"srivastava","year":"2014","journal-title":"Journal of Machine Learning Research"},{"key":"ref46","article-title":"Openai gym","author":"brockman","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref23","article-title":"Improving neural networks by preventing co-adaptation of feature detectors","author":"hinton","year":"2012","journal-title":"ArXiv Preprint"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref26","first-page":"207","article-title":"Deep gaussian processes","author":"damianou","year":"2013","journal-title":"Artificial Intelligence and Statistics"},{"key":"ref25","article-title":"Understanding dropout","volume":"26","author":"baldi","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref20","article-title":"Prioritized experience replay","author":"schaul","year":"2015","journal-title":"ArXiv Preprint"},{"journal-title":"Kalman Filtering and Neural Networks","year":"2004","author":"haykin","key":"ref42"},{"key":"ref41","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v37i8.26200","article-title":"Kalman bayesian neural networks for closed-form online learning","author":"wagner","year":"2023","journal-title":"Proceedings of the 37th AAAI Conference on Artificial Intelligence"},{"key":"ref22","first-page":"2825","article-title":"Scikit-learn: Machine learning in Python","volume":"12","author":"pedregosa","year":"2011","journal-title":"Journal of Machine Learning Research"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196924"},{"key":"ref21","first-page":"5048","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref43","first-page":"7648","article-title":"Automatic curriculum learning through value disagreement","volume":"33","author":"zhang","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref28","article-title":"Simple and scalable predictive uncertainty estimation using deep ensembles","volume":"30","author":"lakshminarayanan","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref27","article-title":"Concrete dropout","volume":"30","author":"gal","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref29","first-page":"234","article-title":"Uncertainty in neural networks: Approximately bayesian ensembling","author":"pearce","year":"2020","journal-title":"Int Conf on Artificial Intelligence and Statistics"},{"key":"ref8","first-page":"1050","article-title":"Dropout as a bayesian approximation: Representing model uncertainty in deep learning","author":"gal","year":"2016","journal-title":"Int Conference on Machine Learning"},{"key":"ref7","article-title":"Uncertainty weighted actor-critic for offline reinforcement learning","author":"wu","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-01560-1"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"silver","year":"2016","journal-title":"Nature"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.2352\/ISSN.2470-1173.2017.19.AVM-023"},{"key":"ref40","first-page":"3179","article-title":"Bayesian perceptron: Towards fully bayesian neural networks","author":"huber","year":"2020","journal-title":"Proceedings of the 59th IEEE Conference on Decision and Control (CDC)"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2023,5,29]]},"location":"London, United Kingdom","end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10160686.pdf?arnumber=10160686","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T13:29:51Z","timestamp":1690205391000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10160686\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":46,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10160686","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}