{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,19]],"date-time":"2026-01-19T13:28:29Z","timestamp":1768829309139,"version":"3.49.0"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/cdc40024.2019.9029503","type":"proceedings-article","created":{"date-parts":[[2020,3,13]],"date-time":"2020-03-13T00:43:11Z","timestamp":1584060191000},"page":"803-808","source":"Crossref","is-referenced-by-count":5,"title":["Deep Reinforcement Learning with Feedback-based Exploration"],"prefix":"10.1109","author":[{"given":"Jan","family":"Scholten","sequence":"first","affiliation":[]},{"given":"Daan","family":"Wout","sequence":"additional","affiliation":[]},{"given":"Carlos","family":"Celemin","sequence":"additional","affiliation":[]},{"given":"Jens","family":"Kober","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","article-title":"A distributional perspective on reinforcement learning","author":"bellemare","year":"2017","journal-title":"Proc of the International Conference on Machine Learning (ICML)"},{"key":"ref32","article-title":"Interactive learning with corrective feedback for policies based on deep neural networks","author":"dattari","year":"2018","journal-title":"Int Symp for Experimental Robotics (ISER)"},{"key":"ref31","article-title":"OpenAI gym","author":"brockman","year":"2016"},{"key":"ref30","article-title":"Actor-critic algorithms","author":"konda","year":"2000","journal-title":"Advances Neural Inform Processing Syst (NIPS)"},{"key":"ref35","article-title":"Learning Gaussian policies from corrective human feedback","author":"wout","year":"2019"},{"key":"ref34","article-title":"Distributional policy gradients","author":"barth-maron","year":"2018","journal-title":"Proc of the Int Conf on Learning Representations (ICLR)"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref11","article-title":"Reinforcement learning from simultaneous human and MDP reward","author":"knox","year":"2012","journal-title":"Int Conf Autonomous Agents and Multiagent Systems (AAMAS'05)"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-018-9786-6"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref14","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"vecerik","year":"2017"},{"key":"ref15","article-title":"Asynchronous data aggregation for training end to end visual control networks","author":"monfort","year":"2017","journal-title":"Autonomous Agents and Multiagent Systems (AAMAS)"},{"key":"ref16","article-title":"Deep reinforcement learning from human preferences","author":"christiano","year":"2017","journal-title":"Advances Neural Inform Processing Syst (NIPS)"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ROMAN.2011.6005223"},{"key":"ref18","article-title":"Combining manual feedback with subsequent MDP reward signals for reinforcement learning","author":"knox","year":"2010","journal-title":"Int Conf Autonomous Agents and Multiagent Systems (AAMAS'05)"},{"key":"ref19","article-title":"Policy shaping: Integrating human feedback with reinforcement learning","author":"griffith","year":"2013","journal-title":"Advances Neural Inform Processing Syst (NIPS)"},{"key":"ref28","article-title":"The importance of experience replay database composition in deep reinforcement learning","author":"de bruin","year":"2015","journal-title":"Deep Reinforcement Learning Workshop NIPS"},{"key":"ref4","article-title":"AI in supply chain and logistics","author":"camhi","year":"2018","journal-title":"Business Insider Intelligence"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.388"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC.2018.8569635"},{"key":"ref6","article-title":"Clinical data based optimal STI strategies for HIV: A reinforcement learning approach","author":"ernst","year":"2007","journal-title":"IEEE Conf on Decision and Control (CDC)"},{"key":"ref29","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"Proc of the Int Conf on Learning Representations (ICLR)"},{"key":"ref5","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11694","article-title":"Deep reinforcement learning that matters","author":"henderson","year":"2018","journal-title":"Proc AAAI Conf Artificial Intelligence"},{"key":"ref8","article-title":"Economic costs of diabetes in the U.S. in 2017","year":"2018","journal-title":"Diabetes Care"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1002\/sim.3720"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460907"},{"key":"ref9","article-title":"Rainbow: Combining improvements in deep reinforcement learning","author":"hessel","year":"2018","journal-title":"Proc AAAI Conf Artif Intell (AAAI)"},{"key":"ref1","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref20","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"Proc of the International Conference on Machine Learning (ICML)"},{"key":"ref22","article-title":"Including uncertainty when learning from human corrections","author":"losey","year":"2018","journal-title":"Conference on Robot Learning (CoRL)"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref24","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2016","journal-title":"Proc of the Int Conf on Learning Representations (ICLR)"},{"key":"ref23","author":"thrun","year":"2005","journal-title":"Probabilistic Robotics"},{"key":"ref26","article-title":"Deep exploration via bootstrapped DQN","author":"osband","year":"2016","journal-title":"Advances Neural Inform Processing Syst (NIPS)"},{"key":"ref25","article-title":"Uncertainty in Deep Learning","author":"gal","year":"2016","journal-title":"PhD thesis"}],"event":{"name":"2019 IEEE 58th Conference on Decision and Control (CDC)","location":"Nice, France","start":{"date-parts":[[2019,12,11]]},"end":{"date-parts":[[2019,12,13]]}},"container-title":["2019 IEEE 58th Conference on Decision and Control (CDC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8977134\/9028853\/09029503.pdf?arnumber=9029503","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,28]],"date-time":"2023-09-28T11:00:44Z","timestamp":1695898844000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9029503\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/cdc40024.2019.9029503","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}