{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,12]],"date-time":"2026-04-12T00:16:18Z","timestamp":1775952978508,"version":"3.50.1"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T00:00:00Z","timestamp":1666483200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,10,23]],"date-time":"2022-10-23T00:00:00Z","timestamp":1666483200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,10,23]]},"DOI":"10.1109\/iros47612.2022.9981286","type":"proceedings-article","created":{"date-parts":[[2022,12,26]],"date-time":"2022-12-26T19:38:15Z","timestamp":1672083495000},"page":"4048-4055","source":"Crossref","is-referenced-by-count":4,"title":["Analyzing and Overcoming Degradation in Warm-Start Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Benjamin","family":"Wexler","sequence":"first","affiliation":[{"name":"Bar-Ilan University,Department of Computer Science,Ramat-Gan,Israel"}]},{"given":"Elad","family":"Sarafian","sequence":"additional","affiliation":[{"name":"Bar-Ilan University,Department of Computer Science,Ramat-Gan,Israel"}]},{"given":"Sarit","family":"Kraus","sequence":"additional","affiliation":[{"name":"Bar-Ilan University,Department of Computer Science,Ramat-Gan,Israel"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-03051-4"},{"key":"ref3","author":"Berner","year":"2019","journal-title":"Dota 2 with large scale deep reinforcement learning"},{"key":"ref4","author":"Zhu","year":"2020","journal-title":"The ingredients of real-world robotic reinforcement learning"},{"key":"ref5","first-page":"5048","article-title":"Hindsight experience replay","author":"Andrychowicz","year":"2017","journal-title":"NeurIPS"},{"key":"ref6","first-page":"2161","article-title":"Guided deep reinforcement learning of control policies for dexterous human-robot interaction","volume-title":"ICRA","author":"Christen","year":"2019"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref8","author":"Vecerik","year":"2017","journal-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref10","author":"Goecks","year":"2019","journal-title":"Integrating behavior cloning and reinforcement learning for improved performance in dense and sparse reward environments"},{"key":"ref11","author":"Cheng","year":"2018","journal-title":"Fast policy learning through imitation and reinforcement"},{"key":"ref12","author":"Zhu","year":"2017","journal-title":"Effective warm start for the online actor-critic re-inforcement learning based mhealth intervention"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.32657\/10356\/90191"},{"key":"ref14","author":"Haarnoja","year":"2018","journal-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor"},{"key":"ref15","author":"Fujimoto","year":"2018","journal-title":"Addressing function approximation error in actor-critic methods"},{"key":"ref16","author":"Fujimoto","year":"2019","journal-title":"Benchmarking batch deep reinforcement learning algorithms"},{"key":"ref17","first-page":"2052","article-title":"Off-policy deep reinforcement learning without exploration","author":"Fujimoto","year":"2019","journal-title":"ICML"},{"key":"ref18","volume-title":"Stabilizing off-policy q-learning via bootstrapping error reduction","author":"Kumar"},{"key":"ref19","author":"Wu","year":"2019","journal-title":"Behavior regularized offline reinforcement learning"},{"key":"ref20","article-title":"Algaedice: Policy gradient from arbitrary experience","author":"Nachum","year":"2019","journal-title":"arXiv preprint"},{"key":"ref21","first-page":"1179","article-title":"Conservative q-learning for offline reinforcement learning","volume":"33","author":"Kumar","year":"2020","journal-title":"Advances in Neural In-formation Processing Systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/396"},{"key":"ref23","author":"Levine","year":"2020","journal-title":"Offline reinforce-ment learning: Tutorial, review, and perspectives on open problems"},{"key":"ref24","author":"Jaques","year":"2019","journal-title":"Way off-policy batch deep reinforcement learning of implicit human preferences in dialog"},{"key":"ref25","first-page":"5774","article-title":"Offline re-inforcement learning with fisher divergence critic regularization","author":"Kostrikov","year":"2021","journal-title":"ICML"},{"key":"ref26","first-page":"5824","article-title":"Gra-dient surgery for multi-task learning","volume":"33","author":"Yu","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref27","article-title":"Learning the pareto front with hypernetworks","author":"Navon","year":"2020","journal-title":"arXiv preprint"},{"key":"ref28","author":"Brockman","year":"2016","journal-title":"Openai gym"},{"key":"ref29","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","volume":"99","author":"Sutton","year":"1999","journal-title":"NIPs"},{"key":"ref30","first-page":"2020","article-title":"Phasic policy gradient","author":"Cobbe","year":"2021","journal-title":"ICML"},{"key":"ref31","author":"Schulman","year":"2017","journal-title":"Proximal policy optimization algorithms"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1017\/9781108955652.016"},{"key":"ref33","first-page":"767","article-title":"Interference and generalization in temporal difference learning","author":"Bengio","year":"2020","journal-title":"ICML"},{"key":"ref34","article-title":"A kernel statistical test of independence","author":"Gretton","year":"2007","journal-title":"Nips"},{"key":"ref35","author":"Chan","year":"2019","journal-title":"Measuring the reliability of reinforcement learning algorithms"}],"event":{"name":"2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","location":"Kyoto, Japan","start":{"date-parts":[[2022,10,23]]},"end":{"date-parts":[[2022,10,27]]}},"container-title":["2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9981026\/9981028\/09981286.pdf?arnumber=9981286","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T04:30:57Z","timestamp":1706761857000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9981286\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,23]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/iros47612.2022.9981286","relation":{},"subject":[],"published":{"date-parts":[[2022,10,23]]}}}