{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T11:12:32Z","timestamp":1762254752351,"version":"3.37.3"},"reference-count":45,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,12,4]],"date-time":"2023-12-04T00:00:00Z","timestamp":1701648000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,4]],"date-time":"2023-12-04T00:00:00Z","timestamp":1701648000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,12,4]]},"DOI":"10.1109\/robio58561.2023.10355039","type":"proceedings-article","created":{"date-parts":[[2023,12,22]],"date-time":"2023-12-22T19:20:45Z","timestamp":1703272845000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["METREE: Max-Entropy Exploration with Random Encoding for Efficient RL with Human Preferences"],"prefix":"10.1109","author":[{"given":"Isabel Y.N","family":"Guan","sequence":"first","affiliation":[{"name":"Peking University,School of Software and Microelectronics,Beijing,China,100871"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xin","family":"Liu","sequence":"additional","affiliation":[{"name":"Southern University of Science and Technology (SUSTech),Department of Mechanical and Energy Engineering,Shenzhen,China,518055"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gary","family":"Zhang","sequence":"additional","affiliation":[{"name":"Nanyang Technological University,School of Mechanical and Aerospace Engineering,Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Estella","family":"Zhao","sequence":"additional","affiliation":[{"name":"Southern University of Science and Technology (SUSTech),Department of Mechanical and Energy Engineering,Shenzhen,China,518055"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhenzhong","family":"Jia","sequence":"additional","affiliation":[{"name":"Southern University of Science and Technology (SUSTech),Department of Mechanical and Energy Engineering,Shenzhen,China,518055"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202141"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref3","article-title":"Concrete problems in AI safety","author":"Amodei","year":"2016","journal-title":"ArXiv Prepr. ArXiv160606565"},{"article-title":"Deep reinforcement learning from human preferences","year":"2017","author":"Christiano","key":"ref4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636020"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2010.5649089"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1201\/9781351251389-4"},{"key":"ref8","first-page":"2","article-title":"Algorithms for inverse reinforcement learning","volume-title":"Icml","author":"Ng"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461249"},{"article-title":"Deep reinforcement learning from human preferences","year":"2023","author":"Christiano","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/1597735.1597738"},{"journal-title":"Reward learning from human preferences and demonstrations in Atari","key":"ref12"},{"article-title":"PEBBLE: Feedback-Efficient Interactive Reinforcement Learning via Relabeling Experience and Unsupervised Pre-training","volume-title":"the International Conference on Machine Learning","author":"Lee","key":"ref13"},{"article-title":"Reward Uncertainty for Exploration in Preference-based Reinforcement Learning","volume-title":"Deep RL Workshop NeurIPS 2021","author":"Liang","key":"ref14"},{"key":"ref15","article-title":"A Bayesian Approach for Policy Learning from Trajectory Preference Queries","volume-title":"Advances in Neural Information Processing Systems","author":"Wilson","year":"2012"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2020.xvi.041"},{"key":"ref17","first-page":"5062","article-title":"Self-supervised exploration via disagreement","volume-title":"International conference on machine learning","author":"Pathak"},{"journal-title":"Sutton & Barto Book: Reinforcement Learning: An Introduction","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.32657\/10356\/90191"},{"issue":"5","key":"ref21","first-page":"562","article-title":"Automatic gain tuning method of a quad-rotor geometric attitude controller using A3C","volume":"14","author":"Lee","year":"2017","journal-title":"Int. J. Automation Comput."},{"article-title":"Policy distillation","year":"2015","author":"Colmenarejo","key":"ref22"},{"article-title":"Playing hard exploration games by watching YouTube","year":"2018","author":"Aytar","key":"ref23"},{"key":"ref24","article-title":"Learning to poke by poking: Experiential learning of intuitive physics","volume":"29","author":"Agrawal","year":"2016","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref25","first-page":"7105","article-title":"Meta-reinforcement learning with latent variable Gaussian processes","volume-title":"Proceedings of the 36th International Conference on Machine Learning","author":"Xie"},{"key":"ref26","first-page":"3769","article-title":"Sim-to-real transfer learning using robustified controllers in robotic tasks","volume-title":"2017 IEEE\/RSJ International Conference on Intelligent Robots and Systems","author":"Hadsell"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"article-title":"Transfer from simulation to real world through learning deep inverse dynamics model","year":"2016","author":"Christiano","key":"ref28"},{"article-title":"PyBullet Gymperium","year":"2018","author":"Zhang","key":"ref29"},{"key":"ref30","first-page":"1230","article-title":"Robust visual servoing with deep reinforcement learning and its application to autonomous drones","volume-title":"Proceedings of the 20th International Conference on Autonomous Agents and Multiagent Systems","author":"Tanaka"},{"key":"ref31","article-title":"End-to-end trajectory optimization via deep inverse reinforcement learning","author":"Wang","year":"2019","journal-title":"Robotics: Science and Systems (RSS)"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1162\/106365602320169811"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/icra.2016.7487170"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"article-title":"OpenAI Gym","year":"2016","author":"Brockman","key":"ref36"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/s10472-015-9463-9"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/2897824.2925881"},{"key":"ref39","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","volume-title":"Proceedings of the 34th International Conference on Machine Learning","volume":"70","author":"Finn"},{"article-title":"Unsupervised representation learning with deep convolutional generative adversarial networks","year":"2015","author":"Radford","key":"ref40"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.009"},{"article-title":"Multi-agent reinforcement learning in parameterized action spaces","year":"2018","author":"Plappert","key":"ref42"},{"volume-title":"Nonlinear systems","year":"2002","author":"Khalil","key":"ref43"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-015-9479-3"},{"issue":"9","key":"ref45","first-page":"1044","article-title":"Adaptive sliding mode control with PID sliding surface for trajectory tracking of robotic manipulators","volume":"32","author":"Lii","year":"2013","journal-title":"Int. J. Rob. Res."}],"event":{"name":"2023 IEEE International Conference on Robotics and Biomimetics (ROBIO)","start":{"date-parts":[[2023,12,4]]},"location":"Koh\u00a0Samui, Thailand","end":{"date-parts":[[2023,12,9]]}},"container-title":["2023 IEEE International Conference on Robotics and Biomimetics (ROBIO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10354348\/10354529\/10355039.pdf?arnumber=10355039","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T22:39:05Z","timestamp":1705099145000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10355039\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,4]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/robio58561.2023.10355039","relation":{},"subject":[],"published":{"date-parts":[[2023,12,4]]}}}