{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,27]],"date-time":"2025-06-27T08:44:21Z","timestamp":1751013861802,"version":"3.41.0"},"reference-count":38,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,5]]},"DOI":"10.1109\/ijcnn.2017.7965896","type":"proceedings-article","created":{"date-parts":[[2017,7,10]],"date-time":"2017-07-10T21:41:30Z","timestamp":1499722890000},"page":"510-517","source":"Crossref","is-referenced-by-count":16,"title":["Deep reward shaping from demonstrations"],"prefix":"10.1109","author":[{"given":"Ahmed","family":"Hussein","sequence":"first","affiliation":[]},{"given":"Eyad","family":"Elyan","sequence":"additional","affiliation":[]},{"given":"Mohamed Medhat","family":"Gaber","sequence":"additional","affiliation":[]},{"given":"Chrisina","family":"Jayne","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-44188-7_1"},{"key":"ref33","first-page":"1","article-title":"Guided policy search","author":"levine","year":"2013","journal-title":"Proceedings of the 30th International Conference on Machine Learning"},{"key":"ref32","first-page":"26","article-title":"Reinforcement learning from demonstration through shaping","author":"brys","year":"2015","journal-title":"Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI)"},{"key":"ref31","first-page":"1890","article-title":"Imitation learning with demonstrations and shaping rewards","author":"judah","year":"2014","journal-title":"AAAI"},{"key":"ref30","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","volume":"99","author":"ng","year":"1999","journal-title":"ICML"},{"key":"ref37","first-page":"1007","article-title":"Internal rewards mitigate agent boundedness","author":"sorg","year":"2010","journal-title":"Proceedings of the 27th International Conference on Machine Learning (ICML-10)"},{"journal-title":"Deep learning for reward design to improve monte carlo tree search in atari games","year":"2016","author":"guo","key":"ref36"},{"journal-title":"Maximum entropy deep inverse reinforcement learning","year":"2015","author":"wulfmeier","key":"ref35"},{"key":"ref34","first-page":"1766","article-title":"Training deep convolutional neural networks to play go","author":"clark","year":"2015","journal-title":"Proceedings of the 32nd International Conference on Machine Learning (ICML-15)"},{"journal-title":"Deep reinforcement learning from self-play in imperfect-information games","year":"2016","author":"heinrich","key":"ref10"},{"journal-title":"Target-driven visual navigation in indoor scenes using deep reinforcement learning","year":"2016","author":"zhu","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"2025","DOI":"10.1145\/2207676.2208350","article-title":"I did that! measuring users' experience of agency in their own actions","author":"coyle","year":"2012","journal-title":"Proceedings of the SIGCHI conference on Human factors in computing systems"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"72","DOI":"10.1145\/191666.191703","article-title":"Computers are social actors","author":"nass","year":"1994","journal-title":"Proceedings of the SIGCHI conference on Human factors in computing systems"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO.2012.6491170"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-247-2.50055-3"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"363","DOI":"10.1007\/11552246_35","article-title":"Autonomous inverted helicopter flight via reinforcement learning","author":"ng","year":"2006","journal-title":"Experimental Robotics IX"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2008.IV.034"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/1329125.1329407"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/0004-3702(94)90047-7"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2011.2181676"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992699"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/1228716.1228751"},{"journal-title":"Asynchronous methods for deep reinforcement learning","year":"2016","author":"mnih","key":"ref6"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-335-6.50030-1"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"journal-title":"Continuous control with deep reinforcement learning","year":"2015","author":"lillicrap","key":"ref8"},{"journal-title":"End-to-end training of deep visuomotor policies","year":"2015","author":"levine","key":"ref7"},{"key":"ref2","first-page":"1","article-title":"An application of reinforcement learning to aerobatic helicopter flight","volume":"19","author":"abbeel","year":"2007","journal-title":"Advances in neural information processing systems"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jamda.2010.10.002"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2007.4399220"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/1121241.1121263"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/1329125.1329407"},{"journal-title":"Prioritized experience replay","year":"2015","author":"schaul","key":"ref24"},{"journal-title":"Playing atari with deep reinforcement learning","year":"2013","author":"mnih","key":"ref23"},{"journal-title":"Recurrent deep q-learning for pac-man","year":"0","author":"ranjan","key":"ref26"},{"key":"ref25","first-page":"3338","article-title":"Deep learning for real-time atari game play using offline monte-carlo tree search planning","author":"guo","year":"2014","journal-title":"Advances in neural information processing systems"}],"event":{"name":"2017 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2017,5,14]]},"location":"Anchorage, AK, USA","end":{"date-parts":[[2017,5,19]]}},"container-title":["2017 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7958416\/7965814\/07965896.pdf?arnumber=7965896","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,21]],"date-time":"2025-06-21T20:30:06Z","timestamp":1750537806000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7965896\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,5]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/ijcnn.2017.7965896","relation":{},"subject":[],"published":{"date-parts":[[2017,5]]}}}