{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T05:26:49Z","timestamp":1730266009180,"version":"3.28.0"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,18]]},"DOI":"10.1109\/ijcnn52387.2021.9533401","type":"proceedings-article","created":{"date-parts":[[2021,9,20]],"date-time":"2021-09-20T17:27:41Z","timestamp":1632158861000},"page":"1-9","source":"Crossref","is-referenced-by-count":0,"title":["Reward Shaping with Dynamic Trajectory Aggregation"],"prefix":"10.1109","author":[{"given":"Takato","family":"Okudo","sequence":"first","affiliation":[]},{"given":"Seiji","family":"Yamada","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1190"},{"key":"ref32","first-page":"35403549","article-title":"Feudal networks for hierarchical reinforcement learning","volume":"70","author":"vezhnevets","year":"0","journal-title":"Proceedings of the 34th International Conference on Machine Learning"},{"key":"ref31","first-page":"1633","article-title":"Transfer learning for reinforcement learning domains: A survey","volume":"10","author":"taylor","year":"2009","journal-title":"Journal of Machine Learning Research"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/817"},{"key":"ref35","first-page":"14331438","article-title":"Maximum entropy inverse reinforcement learning","volume":"3","author":"ziebart","year":"0","journal-title":"Proceedings of the 23rd National Conference on Artificial Intelligence"},{"key":"ref34","article-title":"Principled methods for advising reinforcement learning agents","author":"wiewiora","year":"0","journal-title":"Proceedings of the 20th International Conference on Machine Learning"},{"key":"ref10","first-page":"3504","author":"gao","year":"2015","journal-title":"Potential based reward shaping for hierarchical reinforcement learning"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2010.01.001"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-87536-9_37"},{"key":"ref13","first-page":"1913","article-title":"Shaping mario with human advice","author":"harutyunyan","year":"2015","journal-title":"Proceedings of the 2015 International Conference on Autonomous Agents and Multiagent Systems AAMAS 2015"},{"key":"ref14","first-page":"2652","author":"harutyunyan","year":"2015","journal-title":"Expressing arbitrary reward functions as potential-based advice"},{"key":"ref15","first-page":"4565","article-title":"Generative adversarial imitation learning","author":"ho","year":"2016","journal-title":"Advances in Neural Information Processing Systems 29 Curran Associates Inc"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/1597735.1597738"},{"key":"ref17","first-page":"380","article-title":"Value function approximation in reinforcement learning using the Fourier basis","author":"konidaris","year":"0","journal-title":"Proceedings of the 25nd AAAI Conference on Artificial Intelligence"},{"key":"ref18","first-page":"1015","article-title":"Skill discovery in continuous reinforcement learning domains using skill chaining","volume":"22","author":"konidaris","year":"2009","journal-title":"Advances in neural information processing systems"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/THMS.2019.2912447"},{"journal-title":"Reinforcement Learning An Introduction","year":"2018","author":"sutton","key":"ref28"},{"key":"ref4","first-page":"1726","article-title":"The option-critic architecture","author":"bacon","year":"0","journal-title":"Proceedings of the 31st AAAI Conference on Artificial Intelligence"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/534"},{"key":"ref3","first-page":"29352945","article-title":"Playing hard exploration games by watching youtube","author":"aytar","year":"2018","journal-title":"ser NIPS'18 Curran Associates Inc"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1080\/09540091.2018.1443318"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(99)00052-1"},{"journal-title":"OpenAI Gym","year":"2016","author":"brockman","key":"ref5"},{"key":"ref8","first-page":"433","article-title":"Dynamic potential-based reward shaping","author":"devlin","year":"0","journal-title":"Proc of International Conference on Autonomous Agents and Multiagent Systems"},{"key":"ref7","first-page":"1922","article-title":"Landmark based reward shaping in reinforcement learning with hidden states","author":"demir","year":"0","journal-title":"Proc of International Conference on Autonomous Agents and Multiagent Systems"},{"key":"ref2","first-page":"105","author":"amershi","year":"2014","journal-title":"Power to the people The role of humans in interactive machine learning"},{"journal-title":"OpenAI Baselines","year":"2017","author":"dhariwal","key":"ref9"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref20","first-page":"1409","article-title":"Hierarchical reinforcement learning with advantage-based auxiliary rewards","volume":"32","author":"li","year":"2019","journal-title":"Advances in neural information processing systems"},{"journal-title":"Interpretable Machine Learning","year":"2019","author":"molnar","key":"ref22"},{"key":"ref21","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2016","journal-title":"ICLRE"},{"key":"ref24","first-page":"663","article-title":"Algorithms for inverse reinforcement learning","author":"ng","year":"0","journal-title":"Proceedings of the 17th International Conference on Machine Learning"},{"key":"ref23","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","author":"ng","year":"0","journal-title":"Proceedings of the 16th International Conference on Machine Learning"},{"journal-title":"Multi-goal reinforcement learning Challenging robotics environments and request for research","year":"2018","author":"plappert","key":"ref26"},{"key":"ref25","first-page":"8411","article-title":"Learning from trajectories via subgoal discovery","volume":"32","author":"paul","year":"2019","journal-title":"Advances in neural information processing systems"}],"event":{"name":"2021 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2021,7,18]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,7,22]]}},"container-title":["2021 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9533266\/9533267\/09533401.pdf?arnumber=9533401","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T11:45:52Z","timestamp":1652183152000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9533401\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,18]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/ijcnn52387.2021.9533401","relation":{},"subject":[],"published":{"date-parts":[[2021,7,18]]}}}