{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T21:52:03Z","timestamp":1730238723739,"version":"3.28.0"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,20]],"date-time":"2024-05-20T00:00:00Z","timestamp":1716163200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,20]],"date-time":"2024-05-20T00:00:00Z","timestamp":1716163200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,20]]},"DOI":"10.1109\/icdl61372.2024.10644266","type":"proceedings-article","created":{"date-parts":[[2024,8,27]],"date-time":"2024-08-27T17:19:46Z","timestamp":1724779186000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["Learning While Sleeping: Integrating Sleep-Inspired Consolidation with Human Feedback Learning"],"prefix":"10.1109","author":[{"given":"Imene","family":"Tarakli","sequence":"first","affiliation":[{"name":"Sheffield Hallam University,Sheffield,United Kingdom"}]},{"given":"Alessandro","family":"Di Nuovo","sequence":"additional","affiliation":[{"name":"Sheffield Hallam University,Sheffield,United Kingdom"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2019.03087"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2007.09.009"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1037\/xge0000569"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2015.03.009"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1038\/nature04286"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/nature02663"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pbio.1002263"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-042920-015547"},{"key":"ref9","first-page":"4028","article-title":"Iq-learn: Inverse soft-q learning for imitation","volume":"34","author":"Garg","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"volume-title":"Reinforcement learning: An introduction","year":"2018","author":"Sutton","key":"ref10"},{"key":"ref11","first-page":"2","article-title":"Algorithms for inverse reinforcement learning","volume":"1","author":"Ng","year":"2000","journal-title":"Icml"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/1597735.1597738"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ROMAN.2016.7745140"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11485"},{"key":"ref15","first-page":"2285","article-title":"Interactive learning from policy-dependent human feedback","volume-title":"International Conference on Machine Learning","author":"MacGlashan"},{"key":"ref16","article-title":"Deep reinforcement learning from policy-dependent human feedback","author":"Arumugam","year":"2019","journal-title":"arXiv preprint"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.3389\/fnbot.2022.864380"},{"key":"ref18","article-title":"Replay across experiments: A natural extension of off-policy rl","author":"Tirumala","year":"2023","journal-title":"arXiv preprint"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuron.2023.03.005"},{"volume-title":"Gymnasium","year":"2023","author":"Towers","key":"ref20"},{"key":"ref21","first-page":"738","article-title":"A dual representation framework for robot learning with human guidance","volume-title":"Conference on Robot Learning","author":"Zhang"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.cognition.2017.03.006"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref24","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International conference on machine learning","author":"Haarnoja"},{"issue":"274","key":"ref25","first-page":"1","article-title":"Cleanrl: High-quality single-file implementations of deep reinforcement learning algorithms","volume":"23","author":"Huang","year":"2022","journal-title":"Journal of Machine Learning Research"},{"key":"ref26","article-title":"Maximum entropy rl (provably) solves some robust rl problems","author":"Eysenbach","year":"2021","journal-title":"arXiv preprint"}],"event":{"name":"2024 IEEE International Conference on Development and Learning (ICDL)","start":{"date-parts":[[2024,5,20]]},"location":"Austin, TX, USA","end":{"date-parts":[[2024,5,23]]}},"container-title":["2024 IEEE International Conference on Development and Learning (ICDL)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10644131\/10644157\/10644266.pdf?arnumber=10644266","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,3]],"date-time":"2024-09-03T05:30:34Z","timestamp":1725341434000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10644266\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,20]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/icdl61372.2024.10644266","relation":{},"subject":[],"published":{"date-parts":[[2024,5,20]]}}}