{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T16:37:06Z","timestamp":1757608626852,"version":"3.44.0"},"reference-count":39,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000185","name":"DARPA","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11128459","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"13399-13405","source":"Crossref","is-referenced-by-count":0,"title":["SHIRE: Enhancing Sample Efficiency using Human Intuition in REinforcement Learning"],"prefix":"10.1109","author":[{"given":"Amogh","family":"Joshi","sequence":"first","affiliation":[{"name":"Purdue University,West Lafayette,IN,USA,47907"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Adarsh","family":"Kosta","sequence":"additional","affiliation":[{"name":"Purdue University,West Lafayette,IN,USA,47907"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kaushik","family":"Roy","sequence":"additional","affiliation":[{"name":"Purdue University,West Lafayette,IN,USA,47907"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00108"},{"volume-title":"Himode: A hybrid monocular omnidirectional depth estimation model","year":"2022","author":"Junayed","key":"ref2"},{"volume-title":"Panoformer: Panorama transformer for indoor 360 depth estimation","year":"2022","author":"Shen","key":"ref3"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00030"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811821"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00901"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"668","DOI":"10.1007\/978-3-031-19790-1_40","article-title":"Flowformer: A transformer architecture for optical flow","volume-title":"Computer Vision- ECCV 2022: 17th European Conference","author":"Huang","year":"2022"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00963"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2105.15203"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2965415"},{"volume-title":"Real-time neuromorphic navigation: Integrating event-based vision and physics-driven planning on a parrot bebop2 quadrotor","year":"2024","author":"Joshi","key":"ref12"},{"volume-title":"Tracking large-scale ai models","year":"2024","author":"Rahman","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3458723"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-3020"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2019.xv.011"},{"key":"ref18","article-title":"(more) efficient reinforcement learning via posterior sampling","volume":"26","author":"Osband","year":"2013","journal-title":"Advances in Neural Information Processing Systems"},{"article-title":"Exploiting causal graph priors with posterior sampling for reinforcement learning","volume-title":"The Twelfth International Conference on Learning Representations","author":"Mutti","key":"ref19"},{"article-title":"Dream to control: Learning behaviors by latent imagination","volume-title":"International Conference on Learning Representations","author":"Hafner","key":"ref20"},{"article-title":"Model based reinforcement learning for atari","volume-title":"International Conference on Learning Representations","author":"Kaiser","key":"ref21"},{"key":"ref22","article-title":"When to trust your model: Model-based policy optimization","volume":"32","author":"Janner","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1080\/1047840X.2010.524469"},{"key":"ref24","article-title":"Combining policy gradient and q-learning","author":"O\u2019Donoghue","year":"2016","journal-title":"arXiv preprint"},{"key":"ref25","first-page":"1352","article-title":"Reinforcement learning with deep energy-based policies","volume-title":"International conference on machine learning","author":"Haarnoja"},{"key":"ref26","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International conference on machine learning","author":"Haarnoja"},{"key":"ref27","first-page":"465","article-title":"Pilco: A model-based and data-efficient approach to policy search","volume-title":"Proceedings of the 28th International Conference on machine learning (ICML-11)","author":"Deisenroth"},{"issue":"34","key":"ref28","first-page":"25","article-title":"Improving pilco with bayesian neural network dynamics models","volume":"4","author":"Gal","year":"2016","journal-title":"Data-efficient machine learning workshop, ICML"},{"key":"ref29","article-title":"Deep rein-forcement learning in a handful of trials using probabilistic dynamics models","volume":"31","author":"Chua","year":"2018","journal-title":"Advances in neural information processing systems"},{"key":"ref30","article-title":"Efficient reinforcement learning in factored mdps with application to constrained rl","author":"Chen","year":"2020","journal-title":"arXiv preprint"},{"key":"ref31","article-title":"Near-optimal reinforcement learning in factored mdps","volume":"27","author":"Osband","year":"2014","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/347476.347480"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1613\/jair.714"},{"volume-title":"Proximal policy optimization","year":"2024","key":"ref34"},{"volume-title":"Deep q-network","year":"2024","key":"ref35"},{"journal-title":"Probabilistic graphical models: principles and techniques","year":"2009","author":"Koller","key":"ref36"},{"volume-title":"Lunar lander environment","year":"2024","key":"ref37"},{"volume-title":"Efficient memory-based learning for robot control","year":"1990","author":"Moore","key":"ref38"},{"key":"ref39","article-title":"Reinforcement learning using neural networks, with applications to motor control","volume-title":"Institut National Polytechnique de Grenoble-INPG","author":"Coulom","year":"2002"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2025,5,19]]},"location":"Atlanta, GA, USA","end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11128459.pdf?arnumber=11128459","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T06:07:55Z","timestamp":1756879675000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11128459\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":39,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11128459","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}