{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T14:38:07Z","timestamp":1730299087595,"version":"3.28.0"},"reference-count":39,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,9,1]],"date-time":"2019-09-01T00:00:00Z","timestamp":1567296000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,9,1]],"date-time":"2019-09-01T00:00:00Z","timestamp":1567296000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,9,1]],"date-time":"2019-09-01T00:00:00Z","timestamp":1567296000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,9]]},"DOI":"10.1109\/ssrr.2019.8848957","type":"proceedings-article","created":{"date-parts":[[2019,9,26]],"date-time":"2019-09-26T21:50:58Z","timestamp":1569534658000},"page":"15-21","source":"Crossref","is-referenced-by-count":3,"title":["Sample Efficient Reinforcement Learning for Navigation in Complex Environments"],"prefix":"10.1109","author":[{"given":"Barzin","family":"Moridian","sequence":"first","affiliation":[]},{"given":"Brian R.","family":"Page","sequence":"additional","affiliation":[]},{"given":"Nina","family":"Mahmoudian","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"journal-title":"Adam A method for stochastic optimization","year":"2014","author":"kingma","key":"ref39"},{"key":"ref38","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref33","first-page":"5055","article-title":"Hindsight experience replay","author":"andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"journal-title":"Dueling network architectures for deep reinforcement learning","year":"2015","author":"wang","key":"ref32"},{"key":"ref31","first-page":"2094","article-title":"Deep reinforcement learning with double Q-learning","author":"van hasselt","year":"2016","journal-title":"AAAI"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.329"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref35","first-page":"1","article-title":"Guided policy search","author":"levine","year":"2013","journal-title":"Proceedings of the 30th International Conference on Machine Learning (ICML-13)"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202134"},{"journal-title":"Deep reinforcement learning An overview","year":"2017","author":"li","key":"ref10"},{"journal-title":"Playing atari with deep reinforcement learning","year":"2013","author":"mnih","key":"ref11"},{"key":"ref12","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","author":"mnih","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref13","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"Proceedings of the 32nd International Conference on Machine Learning (ICML-15)"},{"key":"ref14","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"journal-title":"Continuous control with deep reinforcement learning","year":"2015","author":"lillicrap","key":"ref15"},{"key":"ref16","first-page":"2829","article-title":"Continuous deep Q-learning with model-based acceleration","author":"gu","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref17","first-page":"2154","article-title":"Value iteration networks","author":"tamar","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref18","first-page":"2616","article-title":"Cog-nitive mapping and planning for visual navigation","author":"gupta","year":"2017","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"ref4","first-page":"1","article-title":"Robotic mapping: A survey","volume":"1","author":"thrun","year":"2002","journal-title":"Exploring Artificial Intelligence in the New Millennium"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460487"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-012-9365-8"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511546877"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989379"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2007.1049"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1177\/1729881416669482"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2016.2624754"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1177\/0278364906065387"},{"key":"ref9","volume":"1","author":"sutton","year":"1998","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2005.844673"},{"journal-title":"Learning to navigate in complex environments","year":"2016","author":"mirowski","key":"ref20"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461076"},{"journal-title":"Emergence of locomotion behaviours in rich environments","year":"2017","author":"heess","key":"ref21"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2015.2509024"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1002\/rob.20276"},{"key":"ref26","first-page":"6","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","author":"ross","year":"2011","journal-title":"AISTATS"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2013.6630809"}],"event":{"name":"2019 IEEE International Symposium on Safety, Security, and Rescue Robotics (SSRR)","start":{"date-parts":[[2019,9,2]]},"location":"W\u00fcrzburg, Germany","end":{"date-parts":[[2019,9,4]]}},"container-title":["2019 IEEE International Symposium on Safety, Security, and Rescue Robotics (SSRR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8843552\/8848928\/08848957.pdf?arnumber=8848957","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,19]],"date-time":"2022-07-19T16:26:06Z","timestamp":1658247966000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8848957\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,9]]},"references-count":39,"URL":"https:\/\/doi.org\/10.1109\/ssrr.2019.8848957","relation":{},"subject":[],"published":{"date-parts":[[2019,9]]}}}