{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T06:46:28Z","timestamp":1730270788764,"version":"3.28.0"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,10,1]]},"DOI":"10.1109\/iros55552.2023.10342114","type":"proceedings-article","created":{"date-parts":[[2023,12,13]],"date-time":"2023-12-13T14:17:55Z","timestamp":1702477075000},"page":"9480-9487","source":"Crossref","is-referenced-by-count":0,"title":["On-Robot Bayesian Reinforcement Learning for POMDPs"],"prefix":"10.1109","author":[{"given":"Hai","family":"Nguyen","sequence":"first","affiliation":[{"name":"Khoury College of Computer Sciences, Northeastern University,Boston,MA,USA,02115"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sammie","family":"Katt","sequence":"additional","affiliation":[{"name":"Khoury College of Computer Sciences, Northeastern University,Boston,MA,USA,02115"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuchen","family":"Xiao","sequence":"additional","affiliation":[{"name":"Khoury College of Computer Sciences, Northeastern University,Boston,MA,USA,02115"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christopher","family":"Amato","sequence":"additional","affiliation":[{"name":"Khoury College of Computer Sciences, Northeastern University,Boston,MA,USA,02115"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref2","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"International conference on machine learning","author":"Mnih","year":"2016"},{"journal-title":"Proximal policy optimization algorithms","year":"2017","author":"Schulman","key":"ref3"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2017.XIII.048"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460528"},{"key":"ref6","first-page":"723","article-title":"Baddr: Bayes-adaptive deep dropout rl for pomdps","volume-title":"Proceedings of the 21 st International Conference on Autonomous Agents and Multiagent Systems","author":"Katt","year":"2022"},{"issue":"5","key":"ref7","article-title":"A bayesian approach for learning and planning in partially observable markov decision processes","volume":"12","author":"Ross","year":"2011","journal-title":"Journal of Machine Learning Research"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2009.v.026"},{"key":"ref9","first-page":"1819","article-title":"Learning in pomdps with monte carlo tree search","volume-title":"International Conference on Machine Learning","author":"Katt","year":"2017"},{"key":"ref10","first-page":"7","article-title":"Bayesian reinforcement learning in factored pomdps","volume-title":"Proceedings of the 18th International Conference on Autonomous Agents and MultiAgent Systems","author":"Katt","year":"2019"},{"key":"ref11","article-title":"Monte-carlo planning in large pomdps","volume":"23","author":"Silver","year":"2010","journal-title":"Advances in neural information processing systems"},{"issue":"1","key":"ref12","first-page":"1929","article-title":"Dropout: a simple way to prevent neural networks from overfitting","volume":"15","author":"Srivastava","year":"2014","journal-title":"The journal of machine learning research"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.073"},{"journal-title":"A framework for efficient robotic manipulation","year":"2020","author":"Zhan","key":"ref15"},{"journal-title":"Representation learning with contrastive predictive coding","year":"2018","author":"Oord","key":"ref16"},{"key":"ref17","article-title":"On-robot learning with equivariant models","volume-title":"Conference on robot learning","author":"Wang","year":"2022"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2022.XVIII.071"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.15607\/rss.2023.xix.056"},{"journal-title":"Day-dreamer: World models for physical robot learning","year":"2022","author":"Wu","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/S0004-3702(98)00023-X"},{"volume-title":"Reinforcement Learning: An Introduction","year":"2018","author":"Sutton","key":"ref22"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729586"},{"key":"ref24","first-page":"1050","article-title":"Dropout as a Bayesian approximation: Representing model uncertainty in deep learning","volume-title":"International conference on machine learning","author":"Gal","year":"2016"},{"key":"ref25","article-title":"Openrave: A planning architecture for autonomous robotics","volume":"79","author":"Diankov","year":"2008","journal-title":"Robotics Institute, Pittsburgh, PA, Tech. Rep. CMU-RI-TR-08\u201334"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2012.2205651"},{"issue":"3.2","key":"ref27","first-page":"5","article-title":"Ros: an open-source robot operating system","volume-title":"ICRA workshop on open source software","volume":"3","author":"Quigley","year":"2009"},{"key":"ref28","article-title":"Deep recurrent q-learning for partially observable mdps","volume-title":"2015 aaai fall symposium series","author":"Hausknecht","year":"2015"},{"journal-title":"Discriminative particle filter reinforcement learning for complex partial observations","year":"2020","author":"Ma","key":"ref29"},{"journal-title":"Mastering atari with discrete world models","year":"2020","author":"Hafner","key":"ref30"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"ref32","article-title":"Deep learning for real-time atari game play using offline monte-carlo tree search planning","volume":"27","author":"Guo","year":"2014","journal-title":"Advances in neural information processing systems"}],"event":{"name":"2023 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","start":{"date-parts":[[2023,10,1]]},"location":"Detroit, MI, USA","end":{"date-parts":[[2023,10,5]]}},"container-title":["2023 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10341341\/10341342\/10342114.pdf?arnumber=10342114","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,19]],"date-time":"2023-12-19T19:15:09Z","timestamp":1703013309000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10342114\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,1]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/iros55552.2023.10342114","relation":{},"subject":[],"published":{"date-parts":[[2023,10,1]]}}}