{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T04:45:51Z","timestamp":1729658751563,"version":"3.28.0"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1109\/ijcnn48605.2020.9207388","type":"proceedings-article","created":{"date-parts":[[2020,9,30]],"date-time":"2020-09-30T00:40:33Z","timestamp":1601426433000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["Meta-Reward Model Based on Trajectory Data with k-Nearest Neighbors Method"],"prefix":"10.1109","author":[{"given":"Xiaohui","family":"Zhu","sequence":"first","affiliation":[]},{"given":"Toshiharu","family":"Sugawara","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1967.1053964"},{"key":"ref31","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11757","article-title":"Deep q-learning from demonstrations","author":"hester","year":"2018","journal-title":"Thirty-Second AAAI Conference on Artificial Intelligence"},{"article-title":"Training Agent for First-Person Shooter Game with Actor-Critic Curriculum Learning","year":"0","author":"wu","key":"ref30"},{"key":"ref34","article-title":"Learning to reinforcement learn","author":"wang","year":"2017","journal-title":"CogSci"},{"key":"ref10","first-page":"475","article-title":"Reinforcement learning from simultaneous human and MDP reward","author":"knox","year":"2012","journal-title":"AAMAS"},{"article-title":"Agent-agnostic human-in-the-loop reinforcement learning","year":"2017","author":"abel","key":"ref11"},{"key":"ref12","first-page":"4299","article-title":"Deep reinforcement learning from human preferences","author":"christiano","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref13","first-page":"8011","article-title":"Reward learning from human preferences and demonstrations in Atari","author":"ibarz","year":"2018","journal-title":"Advances in neural information processing systems"},{"article-title":"Reward shaping via meta-learning","year":"2019","author":"zou","key":"ref14"},{"journal-title":"Reinforcement Learning An Introduction","year":"2011","author":"sutton","key":"ref15"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50030-1"},{"key":"ref18","first-page":"aai3130966","article-title":"Theory and Application of Reward Shaping in Reinforcement Learning","author":"laud","year":"2004","journal-title":"Ph D Dissertation"},{"key":"ref19","first-page":"604","article-title":"Potential-based Shaping in Model-based Reinforcement Learning","author":"asmuth","year":"2008","journal-title":"AAAI"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref4","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","volume":"70","author":"finn","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning"},{"key":"ref27","first-page":"1433","article-title":"Maximum Entropy Inverse Reinforcement Learning","volume":"3","author":"ziebart","year":"2008","journal-title":"Proceedings of the 23rd National Conference on Artificial Intelligence"},{"key":"ref3","first-page":"3630","article-title":"Matching networks for one shot learning","author":"vinyals","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref6","first-page":"1123","article-title":"Potential-based reward shaping for POMDPs","author":"eck","year":"2013","journal-title":"Proceedings of the 2013 International Conference on Autonomous Agents and Multi-agent Systems"},{"key":"ref29","first-page":"429","article-title":"Learning from demonstration for shaping through inverse reinforcement learning","author":"suay","year":"2016","journal-title":"Proceedings of the 2016 International Conference on Autonomous Agents & Multiagent Systems"},{"key":"ref5","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume":"99","author":"ng","year":"1999","journal-title":"ICML"},{"key":"ref8","first-page":"565","article-title":"Reward shaping in episodic reinforcement learning","author":"grze?","year":"2017","journal-title":"Proceedings of the Autonomous Agents and Multi Agents System Conference"},{"article-title":"A new potential-based reward shaping for reinforcement learning agent","year":"2019","author":"badnava","key":"ref7"},{"key":"ref2","first-page":"1842","article-title":"Meta-learning with memory-augmented neural networks","author":"santoro","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref9","first-page":"2067","article-title":"Trial without error: Towards safe reinforcement learning via human intervention","author":"saunders","year":"2018","journal-title":"Proc of International Conference on Autonomous Agents and Multiagent Systems"},{"article-title":"Meta-learning: A survey","year":"2018","author":"vanschoren","key":"ref1"},{"article-title":"Off-policy shaping ensembles in reinforcement learning","year":"2014","author":"harutyunyan","key":"ref20"},{"key":"ref22","first-page":"433","article-title":"Dynamic potential-based reward shaping","author":"devlin","year":"2012","journal-title":"Proc of International Conference on Autonomous Agents and Multiagent Systems"},{"key":"ref21","first-page":"792","article-title":"Principled methods for advising reinforcement learning agents","author":"wiewiora","year":"2003","journal-title":"Proceedings of the 20th International Conference on Machine Learning (ICML-03)"},{"key":"ref24","article-title":"Reinforcement learning from demonstration through shaping","author":"brys","year":"2015","journal-title":"Twenty-Fourth International Joint Conference on Artificial Intelligence"},{"key":"ref23","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11741","article-title":"Belief reward shaping in reinforcement learning","author":"marom","year":"2018","journal-title":"Thirty-Second AAAI Conference on Artificial Intelligence"},{"key":"ref26","first-page":"2","article-title":"Algorithms for inverse reinforcement learning","volume":"1","author":"ng","year":"2000","journal-title":"ICML"},{"article-title":"Dueling network architectures for deep reinforcement learning","year":"2015","author":"wang","key":"ref25"}],"event":{"name":"2020 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2020,7,19]]},"location":"Glasgow, United Kingdom","end":{"date-parts":[[2020,7,24]]}},"container-title":["2020 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9200848\/9206590\/09207388.pdf?arnumber=9207388","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,8]],"date-time":"2023-10-08T17:40:27Z","timestamp":1696786827000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9207388\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/ijcnn48605.2020.9207388","relation":{},"subject":[],"published":{"date-parts":[[2020,7]]}}}